1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define <2 x i16> @vwadd_v2i16(ptr %x, ptr %y) {
6 ; CHECK-LABEL: vwadd_v2i16:
8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
9 ; CHECK-NEXT: vle8.v v9, (a0)
10 ; CHECK-NEXT: vle8.v v10, (a1)
11 ; CHECK-NEXT: vwadd.vv v8, v9, v10
13 %a = load <2 x i8>, ptr %x
14 %b = load <2 x i8>, ptr %y
15 %c = sext <2 x i8> %a to <2 x i16>
16 %d = sext <2 x i8> %b to <2 x i16>
17 %e = add <2 x i16> %c, %d
21 define <4 x i16> @vwadd_v4i16(ptr %x, ptr %y) {
22 ; CHECK-LABEL: vwadd_v4i16:
24 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
25 ; CHECK-NEXT: vle8.v v9, (a0)
26 ; CHECK-NEXT: vle8.v v10, (a1)
27 ; CHECK-NEXT: vwadd.vv v8, v9, v10
29 %a = load <4 x i8>, ptr %x
30 %b = load <4 x i8>, ptr %y
31 %c = sext <4 x i8> %a to <4 x i16>
32 %d = sext <4 x i8> %b to <4 x i16>
33 %e = add <4 x i16> %c, %d
37 define <2 x i32> @vwadd_v2i32(ptr %x, ptr %y) {
38 ; CHECK-LABEL: vwadd_v2i32:
40 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
41 ; CHECK-NEXT: vle16.v v9, (a0)
42 ; CHECK-NEXT: vle16.v v10, (a1)
43 ; CHECK-NEXT: vwadd.vv v8, v9, v10
45 %a = load <2 x i16>, ptr %x
46 %b = load <2 x i16>, ptr %y
47 %c = sext <2 x i16> %a to <2 x i32>
48 %d = sext <2 x i16> %b to <2 x i32>
49 %e = add <2 x i32> %c, %d
53 define <8 x i16> @vwadd_v8i16(ptr %x, ptr %y) {
54 ; CHECK-LABEL: vwadd_v8i16:
56 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
57 ; CHECK-NEXT: vle8.v v9, (a0)
58 ; CHECK-NEXT: vle8.v v10, (a1)
59 ; CHECK-NEXT: vwadd.vv v8, v9, v10
61 %a = load <8 x i8>, ptr %x
62 %b = load <8 x i8>, ptr %y
63 %c = sext <8 x i8> %a to <8 x i16>
64 %d = sext <8 x i8> %b to <8 x i16>
65 %e = add <8 x i16> %c, %d
69 define <4 x i32> @vwadd_v4i32(ptr %x, ptr %y) {
70 ; CHECK-LABEL: vwadd_v4i32:
72 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
73 ; CHECK-NEXT: vle16.v v9, (a0)
74 ; CHECK-NEXT: vle16.v v10, (a1)
75 ; CHECK-NEXT: vwadd.vv v8, v9, v10
77 %a = load <4 x i16>, ptr %x
78 %b = load <4 x i16>, ptr %y
79 %c = sext <4 x i16> %a to <4 x i32>
80 %d = sext <4 x i16> %b to <4 x i32>
81 %e = add <4 x i32> %c, %d
85 define <2 x i64> @vwadd_v2i64(ptr %x, ptr %y) {
86 ; CHECK-LABEL: vwadd_v2i64:
88 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
89 ; CHECK-NEXT: vle32.v v9, (a0)
90 ; CHECK-NEXT: vle32.v v10, (a1)
91 ; CHECK-NEXT: vwadd.vv v8, v9, v10
93 %a = load <2 x i32>, ptr %x
94 %b = load <2 x i32>, ptr %y
95 %c = sext <2 x i32> %a to <2 x i64>
96 %d = sext <2 x i32> %b to <2 x i64>
97 %e = add <2 x i64> %c, %d
101 define <16 x i16> @vwadd_v16i16(ptr %x, ptr %y) {
102 ; CHECK-LABEL: vwadd_v16i16:
104 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
105 ; CHECK-NEXT: vle8.v v10, (a0)
106 ; CHECK-NEXT: vle8.v v11, (a1)
107 ; CHECK-NEXT: vwadd.vv v8, v10, v11
109 %a = load <16 x i8>, ptr %x
110 %b = load <16 x i8>, ptr %y
111 %c = sext <16 x i8> %a to <16 x i16>
112 %d = sext <16 x i8> %b to <16 x i16>
113 %e = add <16 x i16> %c, %d
117 define <8 x i32> @vwadd_v8i32(ptr %x, ptr %y) {
118 ; CHECK-LABEL: vwadd_v8i32:
120 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
121 ; CHECK-NEXT: vle16.v v10, (a0)
122 ; CHECK-NEXT: vle16.v v11, (a1)
123 ; CHECK-NEXT: vwadd.vv v8, v10, v11
125 %a = load <8 x i16>, ptr %x
126 %b = load <8 x i16>, ptr %y
127 %c = sext <8 x i16> %a to <8 x i32>
128 %d = sext <8 x i16> %b to <8 x i32>
129 %e = add <8 x i32> %c, %d
133 define <4 x i64> @vwadd_v4i64(ptr %x, ptr %y) {
134 ; CHECK-LABEL: vwadd_v4i64:
136 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
137 ; CHECK-NEXT: vle32.v v10, (a0)
138 ; CHECK-NEXT: vle32.v v11, (a1)
139 ; CHECK-NEXT: vwadd.vv v8, v10, v11
141 %a = load <4 x i32>, ptr %x
142 %b = load <4 x i32>, ptr %y
143 %c = sext <4 x i32> %a to <4 x i64>
144 %d = sext <4 x i32> %b to <4 x i64>
145 %e = add <4 x i64> %c, %d
149 define <32 x i16> @vwadd_v32i16(ptr %x, ptr %y) {
150 ; CHECK-LABEL: vwadd_v32i16:
152 ; CHECK-NEXT: li a2, 32
153 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
154 ; CHECK-NEXT: vle8.v v12, (a0)
155 ; CHECK-NEXT: vle8.v v14, (a1)
156 ; CHECK-NEXT: vwadd.vv v8, v12, v14
158 %a = load <32 x i8>, ptr %x
159 %b = load <32 x i8>, ptr %y
160 %c = sext <32 x i8> %a to <32 x i16>
161 %d = sext <32 x i8> %b to <32 x i16>
162 %e = add <32 x i16> %c, %d
166 define <16 x i32> @vwadd_v16i32(ptr %x, ptr %y) {
167 ; CHECK-LABEL: vwadd_v16i32:
169 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
170 ; CHECK-NEXT: vle16.v v12, (a0)
171 ; CHECK-NEXT: vle16.v v14, (a1)
172 ; CHECK-NEXT: vwadd.vv v8, v12, v14
174 %a = load <16 x i16>, ptr %x
175 %b = load <16 x i16>, ptr %y
176 %c = sext <16 x i16> %a to <16 x i32>
177 %d = sext <16 x i16> %b to <16 x i32>
178 %e = add <16 x i32> %c, %d
182 define <8 x i64> @vwadd_v8i64(ptr %x, ptr %y) {
183 ; CHECK-LABEL: vwadd_v8i64:
185 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
186 ; CHECK-NEXT: vle32.v v12, (a0)
187 ; CHECK-NEXT: vle32.v v14, (a1)
188 ; CHECK-NEXT: vwadd.vv v8, v12, v14
190 %a = load <8 x i32>, ptr %x
191 %b = load <8 x i32>, ptr %y
192 %c = sext <8 x i32> %a to <8 x i64>
193 %d = sext <8 x i32> %b to <8 x i64>
194 %e = add <8 x i64> %c, %d
198 define <64 x i16> @vwadd_v64i16(ptr %x, ptr %y) {
199 ; CHECK-LABEL: vwadd_v64i16:
201 ; CHECK-NEXT: li a2, 64
202 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
203 ; CHECK-NEXT: vle8.v v16, (a0)
204 ; CHECK-NEXT: vle8.v v20, (a1)
205 ; CHECK-NEXT: vwadd.vv v8, v16, v20
207 %a = load <64 x i8>, ptr %x
208 %b = load <64 x i8>, ptr %y
209 %c = sext <64 x i8> %a to <64 x i16>
210 %d = sext <64 x i8> %b to <64 x i16>
211 %e = add <64 x i16> %c, %d
215 define <32 x i32> @vwadd_v32i32(ptr %x, ptr %y) {
216 ; CHECK-LABEL: vwadd_v32i32:
218 ; CHECK-NEXT: li a2, 32
219 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
220 ; CHECK-NEXT: vle16.v v16, (a0)
221 ; CHECK-NEXT: vle16.v v20, (a1)
222 ; CHECK-NEXT: vwadd.vv v8, v16, v20
224 %a = load <32 x i16>, ptr %x
225 %b = load <32 x i16>, ptr %y
226 %c = sext <32 x i16> %a to <32 x i32>
227 %d = sext <32 x i16> %b to <32 x i32>
228 %e = add <32 x i32> %c, %d
232 define <16 x i64> @vwadd_v16i64(ptr %x, ptr %y) {
233 ; CHECK-LABEL: vwadd_v16i64:
235 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
236 ; CHECK-NEXT: vle32.v v16, (a0)
237 ; CHECK-NEXT: vle32.v v20, (a1)
238 ; CHECK-NEXT: vwadd.vv v8, v16, v20
240 %a = load <16 x i32>, ptr %x
241 %b = load <16 x i32>, ptr %y
242 %c = sext <16 x i32> %a to <16 x i64>
243 %d = sext <16 x i32> %b to <16 x i64>
244 %e = add <16 x i64> %c, %d
248 define <128 x i16> @vwadd_v128i16(ptr %x, ptr %y) nounwind {
249 ; CHECK-LABEL: vwadd_v128i16:
251 ; CHECK-NEXT: addi sp, sp, -16
252 ; CHECK-NEXT: csrr a2, vlenb
253 ; CHECK-NEXT: slli a2, a2, 4
254 ; CHECK-NEXT: sub sp, sp, a2
255 ; CHECK-NEXT: li a2, 128
256 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
257 ; CHECK-NEXT: vle8.v v8, (a0)
258 ; CHECK-NEXT: addi a0, sp, 16
259 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
260 ; CHECK-NEXT: vle8.v v0, (a1)
261 ; CHECK-NEXT: li a0, 64
262 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
263 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
264 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
265 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
266 ; CHECK-NEXT: vwadd.vv v24, v16, v8
267 ; CHECK-NEXT: csrr a0, vlenb
268 ; CHECK-NEXT: slli a0, a0, 3
269 ; CHECK-NEXT: add a0, sp, a0
270 ; CHECK-NEXT: addi a0, a0, 16
271 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
272 ; CHECK-NEXT: addi a0, sp, 16
273 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
274 ; CHECK-NEXT: vwadd.vv v8, v16, v0
275 ; CHECK-NEXT: csrr a0, vlenb
276 ; CHECK-NEXT: slli a0, a0, 3
277 ; CHECK-NEXT: add a0, sp, a0
278 ; CHECK-NEXT: addi a0, a0, 16
279 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
280 ; CHECK-NEXT: csrr a0, vlenb
281 ; CHECK-NEXT: slli a0, a0, 4
282 ; CHECK-NEXT: add sp, sp, a0
283 ; CHECK-NEXT: addi sp, sp, 16
285 %a = load <128 x i8>, ptr %x
286 %b = load <128 x i8>, ptr %y
287 %c = sext <128 x i8> %a to <128 x i16>
288 %d = sext <128 x i8> %b to <128 x i16>
289 %e = add <128 x i16> %c, %d
293 define <64 x i32> @vwadd_v64i32(ptr %x, ptr %y) nounwind {
294 ; CHECK-LABEL: vwadd_v64i32:
296 ; CHECK-NEXT: addi sp, sp, -16
297 ; CHECK-NEXT: csrr a2, vlenb
298 ; CHECK-NEXT: slli a2, a2, 4
299 ; CHECK-NEXT: sub sp, sp, a2
300 ; CHECK-NEXT: li a2, 64
301 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
302 ; CHECK-NEXT: vle16.v v8, (a0)
303 ; CHECK-NEXT: addi a0, sp, 16
304 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
305 ; CHECK-NEXT: vle16.v v0, (a1)
306 ; CHECK-NEXT: li a0, 32
307 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
308 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
309 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
310 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
311 ; CHECK-NEXT: vwadd.vv v24, v16, v8
312 ; CHECK-NEXT: csrr a0, vlenb
313 ; CHECK-NEXT: slli a0, a0, 3
314 ; CHECK-NEXT: add a0, sp, a0
315 ; CHECK-NEXT: addi a0, a0, 16
316 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
317 ; CHECK-NEXT: addi a0, sp, 16
318 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
319 ; CHECK-NEXT: vwadd.vv v8, v16, v0
320 ; CHECK-NEXT: csrr a0, vlenb
321 ; CHECK-NEXT: slli a0, a0, 3
322 ; CHECK-NEXT: add a0, sp, a0
323 ; CHECK-NEXT: addi a0, a0, 16
324 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
325 ; CHECK-NEXT: csrr a0, vlenb
326 ; CHECK-NEXT: slli a0, a0, 4
327 ; CHECK-NEXT: add sp, sp, a0
328 ; CHECK-NEXT: addi sp, sp, 16
330 %a = load <64 x i16>, ptr %x
331 %b = load <64 x i16>, ptr %y
332 %c = sext <64 x i16> %a to <64 x i32>
333 %d = sext <64 x i16> %b to <64 x i32>
334 %e = add <64 x i32> %c, %d
338 define <32 x i64> @vwadd_v32i64(ptr %x, ptr %y) nounwind {
339 ; CHECK-LABEL: vwadd_v32i64:
341 ; CHECK-NEXT: addi sp, sp, -16
342 ; CHECK-NEXT: csrr a2, vlenb
343 ; CHECK-NEXT: slli a2, a2, 4
344 ; CHECK-NEXT: sub sp, sp, a2
345 ; CHECK-NEXT: li a2, 32
346 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
347 ; CHECK-NEXT: vle32.v v8, (a0)
348 ; CHECK-NEXT: addi a0, sp, 16
349 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
350 ; CHECK-NEXT: vle32.v v0, (a1)
351 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
352 ; CHECK-NEXT: vslidedown.vi v16, v8, 16
353 ; CHECK-NEXT: vslidedown.vi v8, v0, 16
354 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
355 ; CHECK-NEXT: vwadd.vv v24, v16, v8
356 ; CHECK-NEXT: csrr a0, vlenb
357 ; CHECK-NEXT: slli a0, a0, 3
358 ; CHECK-NEXT: add a0, sp, a0
359 ; CHECK-NEXT: addi a0, a0, 16
360 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
361 ; CHECK-NEXT: addi a0, sp, 16
362 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
363 ; CHECK-NEXT: vwadd.vv v8, v16, v0
364 ; CHECK-NEXT: csrr a0, vlenb
365 ; CHECK-NEXT: slli a0, a0, 3
366 ; CHECK-NEXT: add a0, sp, a0
367 ; CHECK-NEXT: addi a0, a0, 16
368 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
369 ; CHECK-NEXT: csrr a0, vlenb
370 ; CHECK-NEXT: slli a0, a0, 4
371 ; CHECK-NEXT: add sp, sp, a0
372 ; CHECK-NEXT: addi sp, sp, 16
374 %a = load <32 x i32>, ptr %x
375 %b = load <32 x i32>, ptr %y
376 %c = sext <32 x i32> %a to <32 x i64>
377 %d = sext <32 x i32> %b to <32 x i64>
378 %e = add <32 x i64> %c, %d
382 define <2 x i32> @vwadd_v2i32_v2i8(ptr %x, ptr %y) {
383 ; CHECK-LABEL: vwadd_v2i32_v2i8:
385 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
386 ; CHECK-NEXT: vle8.v v8, (a0)
387 ; CHECK-NEXT: vle8.v v9, (a1)
388 ; CHECK-NEXT: vsext.vf2 v10, v8
389 ; CHECK-NEXT: vsext.vf2 v11, v9
390 ; CHECK-NEXT: vwadd.vv v8, v10, v11
392 %a = load <2 x i8>, ptr %x
393 %b = load <2 x i8>, ptr %y
394 %c = sext <2 x i8> %a to <2 x i32>
395 %d = sext <2 x i8> %b to <2 x i32>
396 %e = add <2 x i32> %c, %d
400 define <4 x i32> @vwadd_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
401 ; CHECK-LABEL: vwadd_v4i32_v4i8_v4i16:
403 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
404 ; CHECK-NEXT: vle8.v v8, (a0)
405 ; CHECK-NEXT: vle16.v v9, (a1)
406 ; CHECK-NEXT: vsext.vf2 v10, v8
407 ; CHECK-NEXT: vwadd.vv v8, v10, v9
409 %a = load <4 x i8>, ptr %x
410 %b = load <4 x i16>, ptr %y
411 %c = sext <4 x i8> %a to <4 x i32>
412 %d = sext <4 x i16> %b to <4 x i32>
413 %e = add <4 x i32> %c, %d
417 define <4 x i64> @vwadd_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
418 ; CHECK-LABEL: vwadd_v4i64_v4i32_v4i8:
420 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
421 ; CHECK-NEXT: vle8.v v8, (a1)
422 ; CHECK-NEXT: vle32.v v10, (a0)
423 ; CHECK-NEXT: vsext.vf4 v11, v8
424 ; CHECK-NEXT: vwadd.vv v8, v10, v11
426 %a = load <4 x i32>, ptr %x
427 %b = load <4 x i8>, ptr %y
428 %c = sext <4 x i32> %a to <4 x i64>
429 %d = sext <4 x i8> %b to <4 x i64>
430 %e = add <4 x i64> %c, %d
434 define <2 x i16> @vwadd_vx_v2i16(ptr %x, i8 %y) {
435 ; CHECK-LABEL: vwadd_vx_v2i16:
437 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
438 ; CHECK-NEXT: vle8.v v9, (a0)
439 ; CHECK-NEXT: vwadd.vx v8, v9, a1
441 %a = load <2 x i8>, ptr %x
442 %b = insertelement <2 x i8> poison, i8 %y, i32 0
443 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
444 %d = sext <2 x i8> %a to <2 x i16>
445 %e = sext <2 x i8> %c to <2 x i16>
446 %f = add <2 x i16> %d, %e
450 define <4 x i16> @vwadd_vx_v4i16(ptr %x, i8 %y) {
451 ; CHECK-LABEL: vwadd_vx_v4i16:
453 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
454 ; CHECK-NEXT: vle8.v v9, (a0)
455 ; CHECK-NEXT: vwadd.vx v8, v9, a1
457 %a = load <4 x i8>, ptr %x
458 %b = insertelement <4 x i8> poison, i8 %y, i32 0
459 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
460 %d = sext <4 x i8> %a to <4 x i16>
461 %e = sext <4 x i8> %c to <4 x i16>
462 %f = add <4 x i16> %d, %e
466 define <2 x i32> @vwadd_vx_v2i32(ptr %x, i16 %y) {
467 ; CHECK-LABEL: vwadd_vx_v2i32:
469 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
470 ; CHECK-NEXT: vle16.v v9, (a0)
471 ; CHECK-NEXT: vwadd.vx v8, v9, a1
473 %a = load <2 x i16>, ptr %x
474 %b = insertelement <2 x i16> poison, i16 %y, i32 0
475 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
476 %d = sext <2 x i16> %a to <2 x i32>
477 %e = sext <2 x i16> %c to <2 x i32>
478 %f = add <2 x i32> %d, %e
482 define <8 x i16> @vwadd_vx_v8i16(ptr %x, i8 %y) {
483 ; CHECK-LABEL: vwadd_vx_v8i16:
485 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
486 ; CHECK-NEXT: vle8.v v9, (a0)
487 ; CHECK-NEXT: vwadd.vx v8, v9, a1
489 %a = load <8 x i8>, ptr %x
490 %b = insertelement <8 x i8> poison, i8 %y, i32 0
491 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
492 %d = sext <8 x i8> %a to <8 x i16>
493 %e = sext <8 x i8> %c to <8 x i16>
494 %f = add <8 x i16> %d, %e
498 define <4 x i32> @vwadd_vx_v4i32(ptr %x, i16 %y) {
499 ; CHECK-LABEL: vwadd_vx_v4i32:
501 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
502 ; CHECK-NEXT: vle16.v v9, (a0)
503 ; CHECK-NEXT: vwadd.vx v8, v9, a1
505 %a = load <4 x i16>, ptr %x
506 %b = insertelement <4 x i16> poison, i16 %y, i32 0
507 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
508 %d = sext <4 x i16> %a to <4 x i32>
509 %e = sext <4 x i16> %c to <4 x i32>
510 %f = add <4 x i32> %d, %e
514 define <2 x i64> @vwadd_vx_v2i64(ptr %x, i32 %y) {
515 ; CHECK-LABEL: vwadd_vx_v2i64:
517 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
518 ; CHECK-NEXT: vle32.v v9, (a0)
519 ; CHECK-NEXT: vwadd.vx v8, v9, a1
521 %a = load <2 x i32>, ptr %x
522 %b = insertelement <2 x i32> poison, i32 %y, i64 0
523 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
524 %d = sext <2 x i32> %a to <2 x i64>
525 %e = sext <2 x i32> %c to <2 x i64>
526 %f = add <2 x i64> %d, %e
530 define <16 x i16> @vwadd_vx_v16i16(ptr %x, i8 %y) {
531 ; CHECK-LABEL: vwadd_vx_v16i16:
533 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
534 ; CHECK-NEXT: vle8.v v10, (a0)
535 ; CHECK-NEXT: vwadd.vx v8, v10, a1
537 %a = load <16 x i8>, ptr %x
538 %b = insertelement <16 x i8> poison, i8 %y, i32 0
539 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
540 %d = sext <16 x i8> %a to <16 x i16>
541 %e = sext <16 x i8> %c to <16 x i16>
542 %f = add <16 x i16> %d, %e
546 define <8 x i32> @vwadd_vx_v8i32(ptr %x, i16 %y) {
547 ; CHECK-LABEL: vwadd_vx_v8i32:
549 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
550 ; CHECK-NEXT: vle16.v v10, (a0)
551 ; CHECK-NEXT: vwadd.vx v8, v10, a1
553 %a = load <8 x i16>, ptr %x
554 %b = insertelement <8 x i16> poison, i16 %y, i32 0
555 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
556 %d = sext <8 x i16> %a to <8 x i32>
557 %e = sext <8 x i16> %c to <8 x i32>
558 %f = add <8 x i32> %d, %e
562 define <4 x i64> @vwadd_vx_v4i64(ptr %x, i32 %y) {
563 ; CHECK-LABEL: vwadd_vx_v4i64:
565 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
566 ; CHECK-NEXT: vle32.v v10, (a0)
567 ; CHECK-NEXT: vwadd.vx v8, v10, a1
569 %a = load <4 x i32>, ptr %x
570 %b = insertelement <4 x i32> poison, i32 %y, i64 0
571 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
572 %d = sext <4 x i32> %a to <4 x i64>
573 %e = sext <4 x i32> %c to <4 x i64>
574 %f = add <4 x i64> %d, %e
578 define <32 x i16> @vwadd_vx_v32i16(ptr %x, i8 %y) {
579 ; CHECK-LABEL: vwadd_vx_v32i16:
581 ; CHECK-NEXT: li a2, 32
582 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
583 ; CHECK-NEXT: vle8.v v12, (a0)
584 ; CHECK-NEXT: vwadd.vx v8, v12, a1
586 %a = load <32 x i8>, ptr %x
587 %b = insertelement <32 x i8> poison, i8 %y, i32 0
588 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
589 %d = sext <32 x i8> %a to <32 x i16>
590 %e = sext <32 x i8> %c to <32 x i16>
591 %f = add <32 x i16> %d, %e
595 define <16 x i32> @vwadd_vx_v16i32(ptr %x, i16 %y) {
596 ; CHECK-LABEL: vwadd_vx_v16i32:
598 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
599 ; CHECK-NEXT: vle16.v v12, (a0)
600 ; CHECK-NEXT: vwadd.vx v8, v12, a1
602 %a = load <16 x i16>, ptr %x
603 %b = insertelement <16 x i16> poison, i16 %y, i32 0
604 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
605 %d = sext <16 x i16> %a to <16 x i32>
606 %e = sext <16 x i16> %c to <16 x i32>
607 %f = add <16 x i32> %d, %e
611 define <8 x i64> @vwadd_vx_v8i64(ptr %x, i32 %y) {
612 ; CHECK-LABEL: vwadd_vx_v8i64:
614 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
615 ; CHECK-NEXT: vle32.v v12, (a0)
616 ; CHECK-NEXT: vwadd.vx v8, v12, a1
618 %a = load <8 x i32>, ptr %x
619 %b = insertelement <8 x i32> poison, i32 %y, i64 0
620 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
621 %d = sext <8 x i32> %a to <8 x i64>
622 %e = sext <8 x i32> %c to <8 x i64>
623 %f = add <8 x i64> %d, %e
627 define <64 x i16> @vwadd_vx_v64i16(ptr %x, i8 %y) {
628 ; CHECK-LABEL: vwadd_vx_v64i16:
630 ; CHECK-NEXT: li a2, 64
631 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
632 ; CHECK-NEXT: vle8.v v16, (a0)
633 ; CHECK-NEXT: vwadd.vx v8, v16, a1
635 %a = load <64 x i8>, ptr %x
636 %b = insertelement <64 x i8> poison, i8 %y, i32 0
637 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
638 %d = sext <64 x i8> %a to <64 x i16>
639 %e = sext <64 x i8> %c to <64 x i16>
640 %f = add <64 x i16> %d, %e
644 define <32 x i32> @vwadd_vx_v32i32(ptr %x, i16 %y) {
645 ; CHECK-LABEL: vwadd_vx_v32i32:
647 ; CHECK-NEXT: li a2, 32
648 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
649 ; CHECK-NEXT: vle16.v v16, (a0)
650 ; CHECK-NEXT: vwadd.vx v8, v16, a1
652 %a = load <32 x i16>, ptr %x
653 %b = insertelement <32 x i16> poison, i16 %y, i32 0
654 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
655 %d = sext <32 x i16> %a to <32 x i32>
656 %e = sext <32 x i16> %c to <32 x i32>
657 %f = add <32 x i32> %d, %e
661 define <16 x i64> @vwadd_vx_v16i64(ptr %x, i32 %y) {
662 ; CHECK-LABEL: vwadd_vx_v16i64:
664 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
665 ; CHECK-NEXT: vle32.v v16, (a0)
666 ; CHECK-NEXT: vwadd.vx v8, v16, a1
668 %a = load <16 x i32>, ptr %x
669 %b = insertelement <16 x i32> poison, i32 %y, i64 0
670 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
671 %d = sext <16 x i32> %a to <16 x i64>
672 %e = sext <16 x i32> %c to <16 x i64>
673 %f = add <16 x i64> %d, %e
677 define <8 x i16> @vwadd_vx_v8i16_i8(ptr %x, ptr %y) {
678 ; CHECK-LABEL: vwadd_vx_v8i16_i8:
680 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
681 ; CHECK-NEXT: vle8.v v9, (a0)
682 ; CHECK-NEXT: lb a0, 0(a1)
683 ; CHECK-NEXT: vwadd.vx v8, v9, a0
685 %a = load <8 x i8>, ptr %x
687 %c = sext i8 %b to i16
688 %d = insertelement <8 x i16> poison, i16 %c, i32 0
689 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
690 %f = sext <8 x i8> %a to <8 x i16>
691 %g = add <8 x i16> %e, %f
695 define <8 x i16> @vwadd_vx_v8i16_i16(ptr %x, ptr %y) {
696 ; CHECK-LABEL: vwadd_vx_v8i16_i16:
698 ; CHECK-NEXT: lh a1, 0(a1)
699 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
700 ; CHECK-NEXT: vle8.v v9, (a0)
701 ; CHECK-NEXT: vmv.v.x v8, a1
702 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
703 ; CHECK-NEXT: vwadd.wv v8, v8, v9
705 %a = load <8 x i8>, ptr %x
706 %b = load i16, ptr %y
707 %d = insertelement <8 x i16> poison, i16 %b, i32 0
708 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
709 %f = sext <8 x i8> %a to <8 x i16>
710 %g = add <8 x i16> %e, %f
714 define <4 x i32> @vwadd_vx_v4i32_i8(ptr %x, ptr %y) {
715 ; CHECK-LABEL: vwadd_vx_v4i32_i8:
717 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
718 ; CHECK-NEXT: vle16.v v9, (a0)
719 ; CHECK-NEXT: lb a0, 0(a1)
720 ; CHECK-NEXT: vwadd.vx v8, v9, a0
722 %a = load <4 x i16>, ptr %x
724 %c = sext i8 %b to i32
725 %d = insertelement <4 x i32> poison, i32 %c, i32 0
726 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
727 %f = sext <4 x i16> %a to <4 x i32>
728 %g = add <4 x i32> %e, %f
732 define <4 x i32> @vwadd_vx_v4i32_i16(ptr %x, ptr %y) {
733 ; CHECK-LABEL: vwadd_vx_v4i32_i16:
735 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
736 ; CHECK-NEXT: vle16.v v9, (a0)
737 ; CHECK-NEXT: lh a0, 0(a1)
738 ; CHECK-NEXT: vwadd.vx v8, v9, a0
740 %a = load <4 x i16>, ptr %x
741 %b = load i16, ptr %y
742 %c = sext i16 %b to i32
743 %d = insertelement <4 x i32> poison, i32 %c, i32 0
744 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
745 %f = sext <4 x i16> %a to <4 x i32>
746 %g = add <4 x i32> %e, %f
750 define <4 x i32> @vwadd_vx_v4i32_i32(ptr %x, ptr %y) {
751 ; CHECK-LABEL: vwadd_vx_v4i32_i32:
753 ; CHECK-NEXT: lw a1, 0(a1)
754 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
755 ; CHECK-NEXT: vle16.v v9, (a0)
756 ; CHECK-NEXT: vmv.v.x v8, a1
757 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
758 ; CHECK-NEXT: vwadd.wv v8, v8, v9
760 %a = load <4 x i16>, ptr %x
761 %b = load i32, ptr %y
762 %d = insertelement <4 x i32> poison, i32 %b, i32 0
763 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
764 %f = sext <4 x i16> %a to <4 x i32>
765 %g = add <4 x i32> %e, %f
769 define <2 x i64> @vwadd_vx_v2i64_i8(ptr %x, ptr %y) nounwind {
770 ; CHECK-LABEL: vwadd_vx_v2i64_i8:
772 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
773 ; CHECK-NEXT: vle32.v v9, (a0)
774 ; CHECK-NEXT: lb a0, 0(a1)
775 ; CHECK-NEXT: vwadd.vx v8, v9, a0
777 %a = load <2 x i32>, ptr %x
779 %c = sext i8 %b to i64
780 %d = insertelement <2 x i64> poison, i64 %c, i64 0
781 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
782 %f = sext <2 x i32> %a to <2 x i64>
783 %g = add <2 x i64> %e, %f
787 define <2 x i64> @vwadd_vx_v2i64_i16(ptr %x, ptr %y) nounwind {
788 ; CHECK-LABEL: vwadd_vx_v2i64_i16:
790 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
791 ; CHECK-NEXT: vle32.v v9, (a0)
792 ; CHECK-NEXT: lh a0, 0(a1)
793 ; CHECK-NEXT: vwadd.vx v8, v9, a0
795 %a = load <2 x i32>, ptr %x
796 %b = load i16, ptr %y
797 %c = sext i16 %b to i64
798 %d = insertelement <2 x i64> poison, i64 %c, i64 0
799 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
800 %f = sext <2 x i32> %a to <2 x i64>
801 %g = add <2 x i64> %e, %f
805 define <2 x i64> @vwadd_vx_v2i64_i32(ptr %x, ptr %y) nounwind {
806 ; CHECK-LABEL: vwadd_vx_v2i64_i32:
808 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
809 ; CHECK-NEXT: vle32.v v9, (a0)
810 ; CHECK-NEXT: lw a0, 0(a1)
811 ; CHECK-NEXT: vwadd.vx v8, v9, a0
813 %a = load <2 x i32>, ptr %x
814 %b = load i32, ptr %y
815 %c = sext i32 %b to i64
816 %d = insertelement <2 x i64> poison, i64 %c, i64 0
817 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
818 %f = sext <2 x i32> %a to <2 x i64>
819 %g = add <2 x i64> %e, %f
823 define <2 x i64> @vwadd_vx_v2i64_i64(ptr %x, ptr %y) nounwind {
824 ; RV32-LABEL: vwadd_vx_v2i64_i64:
826 ; RV32-NEXT: addi sp, sp, -16
827 ; RV32-NEXT: lw a2, 4(a1)
828 ; RV32-NEXT: lw a1, 0(a1)
829 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
830 ; RV32-NEXT: vle32.v v9, (a0)
831 ; RV32-NEXT: sw a2, 12(sp)
832 ; RV32-NEXT: sw a1, 8(sp)
833 ; RV32-NEXT: addi a0, sp, 8
834 ; RV32-NEXT: vlse64.v v8, (a0), zero
835 ; RV32-NEXT: vwadd.wv v8, v8, v9
836 ; RV32-NEXT: addi sp, sp, 16
839 ; RV64-LABEL: vwadd_vx_v2i64_i64:
841 ; RV64-NEXT: ld a1, 0(a1)
842 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
843 ; RV64-NEXT: vle32.v v9, (a0)
844 ; RV64-NEXT: vmv.v.x v8, a1
845 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
846 ; RV64-NEXT: vwadd.wv v8, v8, v9
848 %a = load <2 x i32>, ptr %x
849 %b = load i64, ptr %y
850 %d = insertelement <2 x i64> poison, i64 %b, i64 0
851 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
852 %f = sext <2 x i32> %a to <2 x i64>
853 %g = add <2 x i64> %e, %f
857 define <2 x i32> @vwadd_v2i32_of_v2i8(ptr %x, ptr %y) {
858 ; CHECK-LABEL: vwadd_v2i32_of_v2i8:
860 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
861 ; CHECK-NEXT: vle8.v v8, (a0)
862 ; CHECK-NEXT: vle8.v v9, (a1)
863 ; CHECK-NEXT: vsext.vf2 v10, v8
864 ; CHECK-NEXT: vsext.vf2 v11, v9
865 ; CHECK-NEXT: vwadd.vv v8, v10, v11
867 %a = load <2 x i8>, ptr %x
868 %b = load <2 x i8>, ptr %y
869 %c = sext <2 x i8> %a to <2 x i32>
870 %d = sext <2 x i8> %b to <2 x i32>
871 %e = add <2 x i32> %c, %d
875 define <2 x i64> @vwadd_v2i64_of_v2i8(ptr %x, ptr %y) {
876 ; CHECK-LABEL: vwadd_v2i64_of_v2i8:
878 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
879 ; CHECK-NEXT: vle8.v v8, (a0)
880 ; CHECK-NEXT: vle8.v v9, (a1)
881 ; CHECK-NEXT: vsext.vf4 v10, v8
882 ; CHECK-NEXT: vsext.vf4 v11, v9
883 ; CHECK-NEXT: vwadd.vv v8, v10, v11
885 %a = load <2 x i8>, ptr %x
886 %b = load <2 x i8>, ptr %y
887 %c = sext <2 x i8> %a to <2 x i64>
888 %d = sext <2 x i8> %b to <2 x i64>
889 %e = add <2 x i64> %c, %d
893 define <2 x i64> @vwadd_v2i64_of_v2i16(ptr %x, ptr %y) {
894 ; CHECK-LABEL: vwadd_v2i64_of_v2i16:
896 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
897 ; CHECK-NEXT: vle16.v v8, (a0)
898 ; CHECK-NEXT: vle16.v v9, (a1)
899 ; CHECK-NEXT: vsext.vf2 v10, v8
900 ; CHECK-NEXT: vsext.vf2 v11, v9
901 ; CHECK-NEXT: vwadd.vv v8, v10, v11
903 %a = load <2 x i16>, ptr %x
904 %b = load <2 x i16>, ptr %y
905 %c = sext <2 x i16> %a to <2 x i64>
906 %d = sext <2 x i16> %b to <2 x i64>
907 %e = add <2 x i64> %c, %d