1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define <2 x i16> @vwsubu_v2i16(ptr %x, ptr %y) {
6 ; CHECK-LABEL: vwsubu_v2i16:
8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
9 ; CHECK-NEXT: vle8.v v9, (a0)
10 ; CHECK-NEXT: vle8.v v10, (a1)
11 ; CHECK-NEXT: vwsubu.vv v8, v9, v10
13 %a = load <2 x i8>, ptr %x
14 %b = load <2 x i8>, ptr %y
15 %c = zext <2 x i8> %a to <2 x i16>
16 %d = zext <2 x i8> %b to <2 x i16>
17 %e = sub <2 x i16> %c, %d
21 define <4 x i16> @vwsubu_v4i16(ptr %x, ptr %y) {
22 ; CHECK-LABEL: vwsubu_v4i16:
24 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
25 ; CHECK-NEXT: vle8.v v9, (a0)
26 ; CHECK-NEXT: vle8.v v10, (a1)
27 ; CHECK-NEXT: vwsubu.vv v8, v9, v10
29 %a = load <4 x i8>, ptr %x
30 %b = load <4 x i8>, ptr %y
31 %c = zext <4 x i8> %a to <4 x i16>
32 %d = zext <4 x i8> %b to <4 x i16>
33 %e = sub <4 x i16> %c, %d
37 define <2 x i32> @vwsubu_v2i32(ptr %x, ptr %y) {
38 ; CHECK-LABEL: vwsubu_v2i32:
40 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
41 ; CHECK-NEXT: vle16.v v9, (a0)
42 ; CHECK-NEXT: vle16.v v10, (a1)
43 ; CHECK-NEXT: vwsubu.vv v8, v9, v10
45 %a = load <2 x i16>, ptr %x
46 %b = load <2 x i16>, ptr %y
47 %c = zext <2 x i16> %a to <2 x i32>
48 %d = zext <2 x i16> %b to <2 x i32>
49 %e = sub <2 x i32> %c, %d
53 define <8 x i16> @vwsubu_v8i16(ptr %x, ptr %y) {
54 ; CHECK-LABEL: vwsubu_v8i16:
56 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
57 ; CHECK-NEXT: vle8.v v9, (a0)
58 ; CHECK-NEXT: vle8.v v10, (a1)
59 ; CHECK-NEXT: vwsubu.vv v8, v9, v10
61 %a = load <8 x i8>, ptr %x
62 %b = load <8 x i8>, ptr %y
63 %c = zext <8 x i8> %a to <8 x i16>
64 %d = zext <8 x i8> %b to <8 x i16>
65 %e = sub <8 x i16> %c, %d
69 define <4 x i32> @vwsubu_v4i32(ptr %x, ptr %y) {
70 ; CHECK-LABEL: vwsubu_v4i32:
72 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
73 ; CHECK-NEXT: vle16.v v9, (a0)
74 ; CHECK-NEXT: vle16.v v10, (a1)
75 ; CHECK-NEXT: vwsubu.vv v8, v9, v10
77 %a = load <4 x i16>, ptr %x
78 %b = load <4 x i16>, ptr %y
79 %c = zext <4 x i16> %a to <4 x i32>
80 %d = zext <4 x i16> %b to <4 x i32>
81 %e = sub <4 x i32> %c, %d
85 define <2 x i64> @vwsubu_v2i64(ptr %x, ptr %y) {
86 ; CHECK-LABEL: vwsubu_v2i64:
88 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
89 ; CHECK-NEXT: vle32.v v9, (a0)
90 ; CHECK-NEXT: vle32.v v10, (a1)
91 ; CHECK-NEXT: vwsubu.vv v8, v9, v10
93 %a = load <2 x i32>, ptr %x
94 %b = load <2 x i32>, ptr %y
95 %c = zext <2 x i32> %a to <2 x i64>
96 %d = zext <2 x i32> %b to <2 x i64>
97 %e = sub <2 x i64> %c, %d
101 define <16 x i16> @vwsubu_v16i16(ptr %x, ptr %y) {
102 ; CHECK-LABEL: vwsubu_v16i16:
104 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
105 ; CHECK-NEXT: vle8.v v10, (a0)
106 ; CHECK-NEXT: vle8.v v11, (a1)
107 ; CHECK-NEXT: vwsubu.vv v8, v10, v11
109 %a = load <16 x i8>, ptr %x
110 %b = load <16 x i8>, ptr %y
111 %c = zext <16 x i8> %a to <16 x i16>
112 %d = zext <16 x i8> %b to <16 x i16>
113 %e = sub <16 x i16> %c, %d
117 define <8 x i32> @vwsubu_v8i32(ptr %x, ptr %y) {
118 ; CHECK-LABEL: vwsubu_v8i32:
120 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
121 ; CHECK-NEXT: vle16.v v10, (a0)
122 ; CHECK-NEXT: vle16.v v11, (a1)
123 ; CHECK-NEXT: vwsubu.vv v8, v10, v11
125 %a = load <8 x i16>, ptr %x
126 %b = load <8 x i16>, ptr %y
127 %c = zext <8 x i16> %a to <8 x i32>
128 %d = zext <8 x i16> %b to <8 x i32>
129 %e = sub <8 x i32> %c, %d
133 define <4 x i64> @vwsubu_v4i64(ptr %x, ptr %y) {
134 ; CHECK-LABEL: vwsubu_v4i64:
136 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
137 ; CHECK-NEXT: vle32.v v10, (a0)
138 ; CHECK-NEXT: vle32.v v11, (a1)
139 ; CHECK-NEXT: vwsubu.vv v8, v10, v11
141 %a = load <4 x i32>, ptr %x
142 %b = load <4 x i32>, ptr %y
143 %c = zext <4 x i32> %a to <4 x i64>
144 %d = zext <4 x i32> %b to <4 x i64>
145 %e = sub <4 x i64> %c, %d
149 define <32 x i16> @vwsubu_v32i16(ptr %x, ptr %y) {
150 ; CHECK-LABEL: vwsubu_v32i16:
152 ; CHECK-NEXT: li a2, 32
153 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
154 ; CHECK-NEXT: vle8.v v12, (a0)
155 ; CHECK-NEXT: vle8.v v14, (a1)
156 ; CHECK-NEXT: vwsubu.vv v8, v12, v14
158 %a = load <32 x i8>, ptr %x
159 %b = load <32 x i8>, ptr %y
160 %c = zext <32 x i8> %a to <32 x i16>
161 %d = zext <32 x i8> %b to <32 x i16>
162 %e = sub <32 x i16> %c, %d
166 define <16 x i32> @vwsubu_v16i32(ptr %x, ptr %y) {
167 ; CHECK-LABEL: vwsubu_v16i32:
169 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
170 ; CHECK-NEXT: vle16.v v12, (a0)
171 ; CHECK-NEXT: vle16.v v14, (a1)
172 ; CHECK-NEXT: vwsubu.vv v8, v12, v14
174 %a = load <16 x i16>, ptr %x
175 %b = load <16 x i16>, ptr %y
176 %c = zext <16 x i16> %a to <16 x i32>
177 %d = zext <16 x i16> %b to <16 x i32>
178 %e = sub <16 x i32> %c, %d
182 define <8 x i64> @vwsubu_v8i64(ptr %x, ptr %y) {
183 ; CHECK-LABEL: vwsubu_v8i64:
185 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
186 ; CHECK-NEXT: vle32.v v12, (a0)
187 ; CHECK-NEXT: vle32.v v14, (a1)
188 ; CHECK-NEXT: vwsubu.vv v8, v12, v14
190 %a = load <8 x i32>, ptr %x
191 %b = load <8 x i32>, ptr %y
192 %c = zext <8 x i32> %a to <8 x i64>
193 %d = zext <8 x i32> %b to <8 x i64>
194 %e = sub <8 x i64> %c, %d
198 define <64 x i16> @vwsubu_v64i16(ptr %x, ptr %y) {
199 ; CHECK-LABEL: vwsubu_v64i16:
201 ; CHECK-NEXT: li a2, 64
202 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
203 ; CHECK-NEXT: vle8.v v16, (a0)
204 ; CHECK-NEXT: vle8.v v20, (a1)
205 ; CHECK-NEXT: vwsubu.vv v8, v16, v20
207 %a = load <64 x i8>, ptr %x
208 %b = load <64 x i8>, ptr %y
209 %c = zext <64 x i8> %a to <64 x i16>
210 %d = zext <64 x i8> %b to <64 x i16>
211 %e = sub <64 x i16> %c, %d
215 define <32 x i32> @vwsubu_v32i32(ptr %x, ptr %y) {
216 ; CHECK-LABEL: vwsubu_v32i32:
218 ; CHECK-NEXT: li a2, 32
219 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
220 ; CHECK-NEXT: vle16.v v16, (a0)
221 ; CHECK-NEXT: vle16.v v20, (a1)
222 ; CHECK-NEXT: vwsubu.vv v8, v16, v20
224 %a = load <32 x i16>, ptr %x
225 %b = load <32 x i16>, ptr %y
226 %c = zext <32 x i16> %a to <32 x i32>
227 %d = zext <32 x i16> %b to <32 x i32>
228 %e = sub <32 x i32> %c, %d
232 define <16 x i64> @vwsubu_v16i64(ptr %x, ptr %y) {
233 ; CHECK-LABEL: vwsubu_v16i64:
235 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
236 ; CHECK-NEXT: vle32.v v16, (a0)
237 ; CHECK-NEXT: vle32.v v20, (a1)
238 ; CHECK-NEXT: vwsubu.vv v8, v16, v20
240 %a = load <16 x i32>, ptr %x
241 %b = load <16 x i32>, ptr %y
242 %c = zext <16 x i32> %a to <16 x i64>
243 %d = zext <16 x i32> %b to <16 x i64>
244 %e = sub <16 x i64> %c, %d
248 define <128 x i16> @vwsubu_v128i16(ptr %x, ptr %y) nounwind {
249 ; CHECK-LABEL: vwsubu_v128i16:
251 ; CHECK-NEXT: addi sp, sp, -16
252 ; CHECK-NEXT: csrr a2, vlenb
253 ; CHECK-NEXT: slli a2, a2, 4
254 ; CHECK-NEXT: sub sp, sp, a2
255 ; CHECK-NEXT: li a2, 128
256 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
257 ; CHECK-NEXT: vle8.v v8, (a0)
258 ; CHECK-NEXT: addi a0, sp, 16
259 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
260 ; CHECK-NEXT: vle8.v v0, (a1)
261 ; CHECK-NEXT: li a0, 64
262 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
263 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
264 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
265 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
266 ; CHECK-NEXT: vmv4r.v v24, v8
267 ; CHECK-NEXT: vwsubu.vv v8, v16, v24
268 ; CHECK-NEXT: csrr a0, vlenb
269 ; CHECK-NEXT: slli a0, a0, 3
270 ; CHECK-NEXT: add a0, sp, a0
271 ; CHECK-NEXT: addi a0, a0, 16
272 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
273 ; CHECK-NEXT: addi a0, sp, 16
274 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
275 ; CHECK-NEXT: vwsubu.vv v8, v16, v0
276 ; CHECK-NEXT: csrr a0, vlenb
277 ; CHECK-NEXT: slli a0, a0, 3
278 ; CHECK-NEXT: add a0, sp, a0
279 ; CHECK-NEXT: addi a0, a0, 16
280 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
281 ; CHECK-NEXT: csrr a0, vlenb
282 ; CHECK-NEXT: slli a0, a0, 4
283 ; CHECK-NEXT: add sp, sp, a0
284 ; CHECK-NEXT: addi sp, sp, 16
286 %a = load <128 x i8>, ptr %x
287 %b = load <128 x i8>, ptr %y
288 %c = zext <128 x i8> %a to <128 x i16>
289 %d = zext <128 x i8> %b to <128 x i16>
290 %e = sub <128 x i16> %c, %d
294 define <64 x i32> @vwsubu_v64i32(ptr %x, ptr %y) nounwind {
295 ; CHECK-LABEL: vwsubu_v64i32:
297 ; CHECK-NEXT: addi sp, sp, -16
298 ; CHECK-NEXT: csrr a2, vlenb
299 ; CHECK-NEXT: slli a2, a2, 4
300 ; CHECK-NEXT: sub sp, sp, a2
301 ; CHECK-NEXT: li a2, 64
302 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
303 ; CHECK-NEXT: vle16.v v8, (a0)
304 ; CHECK-NEXT: addi a0, sp, 16
305 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
306 ; CHECK-NEXT: vle16.v v0, (a1)
307 ; CHECK-NEXT: li a0, 32
308 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
309 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
310 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
311 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
312 ; CHECK-NEXT: vmv4r.v v24, v8
313 ; CHECK-NEXT: vwsubu.vv v8, v16, v24
314 ; CHECK-NEXT: csrr a0, vlenb
315 ; CHECK-NEXT: slli a0, a0, 3
316 ; CHECK-NEXT: add a0, sp, a0
317 ; CHECK-NEXT: addi a0, a0, 16
318 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
319 ; CHECK-NEXT: addi a0, sp, 16
320 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
321 ; CHECK-NEXT: vwsubu.vv v8, v16, v0
322 ; CHECK-NEXT: csrr a0, vlenb
323 ; CHECK-NEXT: slli a0, a0, 3
324 ; CHECK-NEXT: add a0, sp, a0
325 ; CHECK-NEXT: addi a0, a0, 16
326 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
327 ; CHECK-NEXT: csrr a0, vlenb
328 ; CHECK-NEXT: slli a0, a0, 4
329 ; CHECK-NEXT: add sp, sp, a0
330 ; CHECK-NEXT: addi sp, sp, 16
332 %a = load <64 x i16>, ptr %x
333 %b = load <64 x i16>, ptr %y
334 %c = zext <64 x i16> %a to <64 x i32>
335 %d = zext <64 x i16> %b to <64 x i32>
336 %e = sub <64 x i32> %c, %d
340 define <32 x i64> @vwsubu_v32i64(ptr %x, ptr %y) nounwind {
341 ; CHECK-LABEL: vwsubu_v32i64:
343 ; CHECK-NEXT: addi sp, sp, -16
344 ; CHECK-NEXT: csrr a2, vlenb
345 ; CHECK-NEXT: slli a2, a2, 4
346 ; CHECK-NEXT: sub sp, sp, a2
347 ; CHECK-NEXT: li a2, 32
348 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
349 ; CHECK-NEXT: vle32.v v8, (a0)
350 ; CHECK-NEXT: addi a0, sp, 16
351 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
352 ; CHECK-NEXT: vle32.v v0, (a1)
353 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
354 ; CHECK-NEXT: vslidedown.vi v16, v8, 16
355 ; CHECK-NEXT: vslidedown.vi v8, v0, 16
356 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
357 ; CHECK-NEXT: vmv4r.v v24, v8
358 ; CHECK-NEXT: vwsubu.vv v8, v16, v24
359 ; CHECK-NEXT: csrr a0, vlenb
360 ; CHECK-NEXT: slli a0, a0, 3
361 ; CHECK-NEXT: add a0, sp, a0
362 ; CHECK-NEXT: addi a0, a0, 16
363 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
364 ; CHECK-NEXT: addi a0, sp, 16
365 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
366 ; CHECK-NEXT: vwsubu.vv v8, v16, v0
367 ; CHECK-NEXT: csrr a0, vlenb
368 ; CHECK-NEXT: slli a0, a0, 3
369 ; CHECK-NEXT: add a0, sp, a0
370 ; CHECK-NEXT: addi a0, a0, 16
371 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
372 ; CHECK-NEXT: csrr a0, vlenb
373 ; CHECK-NEXT: slli a0, a0, 4
374 ; CHECK-NEXT: add sp, sp, a0
375 ; CHECK-NEXT: addi sp, sp, 16
377 %a = load <32 x i32>, ptr %x
378 %b = load <32 x i32>, ptr %y
379 %c = zext <32 x i32> %a to <32 x i64>
380 %d = zext <32 x i32> %b to <32 x i64>
381 %e = sub <32 x i64> %c, %d
385 define <2 x i32> @vwsubu_v2i32_v2i8(ptr %x, ptr %y) {
386 ; CHECK-LABEL: vwsubu_v2i32_v2i8:
388 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
389 ; CHECK-NEXT: vle8.v v8, (a1)
390 ; CHECK-NEXT: vle8.v v9, (a0)
391 ; CHECK-NEXT: vzext.vf2 v10, v8
392 ; CHECK-NEXT: vzext.vf2 v11, v9
393 ; CHECK-NEXT: vwsubu.vv v8, v11, v10
395 %a = load <2 x i8>, ptr %x
396 %b = load <2 x i8>, ptr %y
397 %c = zext <2 x i8> %a to <2 x i32>
398 %d = zext <2 x i8> %b to <2 x i32>
399 %e = sub <2 x i32> %c, %d
403 define <4 x i32> @vwsubu_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
404 ; CHECK-LABEL: vwsubu_v4i32_v4i8_v4i16:
406 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
407 ; CHECK-NEXT: vle8.v v8, (a0)
408 ; CHECK-NEXT: vle16.v v9, (a1)
409 ; CHECK-NEXT: vzext.vf2 v10, v8
410 ; CHECK-NEXT: vwsubu.vv v8, v10, v9
412 %a = load <4 x i8>, ptr %x
413 %b = load <4 x i16>, ptr %y
414 %c = zext <4 x i8> %a to <4 x i32>
415 %d = zext <4 x i16> %b to <4 x i32>
416 %e = sub <4 x i32> %c, %d
420 define <4 x i64> @vwsubu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
421 ; CHECK-LABEL: vwsubu_v4i64_v4i32_v4i8:
423 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
424 ; CHECK-NEXT: vle8.v v8, (a1)
425 ; CHECK-NEXT: vle32.v v10, (a0)
426 ; CHECK-NEXT: vzext.vf4 v11, v8
427 ; CHECK-NEXT: vwsubu.vv v8, v10, v11
429 %a = load <4 x i32>, ptr %x
430 %b = load <4 x i8>, ptr %y
431 %c = zext <4 x i32> %a to <4 x i64>
432 %d = zext <4 x i8> %b to <4 x i64>
433 %e = sub <4 x i64> %c, %d
437 define <2 x i16> @vwsubu_vx_v2i16(ptr %x, i8 %y) {
438 ; CHECK-LABEL: vwsubu_vx_v2i16:
440 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
441 ; CHECK-NEXT: vle8.v v9, (a0)
442 ; CHECK-NEXT: vwsubu.vx v8, v9, a1
444 %a = load <2 x i8>, ptr %x
445 %b = insertelement <2 x i8> poison, i8 %y, i32 0
446 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
447 %d = zext <2 x i8> %a to <2 x i16>
448 %e = zext <2 x i8> %c to <2 x i16>
449 %f = sub <2 x i16> %d, %e
453 define <4 x i16> @vwsubu_vx_v4i16(ptr %x, i8 %y) {
454 ; CHECK-LABEL: vwsubu_vx_v4i16:
456 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
457 ; CHECK-NEXT: vle8.v v9, (a0)
458 ; CHECK-NEXT: vwsubu.vx v8, v9, a1
460 %a = load <4 x i8>, ptr %x
461 %b = insertelement <4 x i8> poison, i8 %y, i32 0
462 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
463 %d = zext <4 x i8> %a to <4 x i16>
464 %e = zext <4 x i8> %c to <4 x i16>
465 %f = sub <4 x i16> %d, %e
469 define <2 x i32> @vwsubu_vx_v2i32(ptr %x, i16 %y) {
470 ; CHECK-LABEL: vwsubu_vx_v2i32:
472 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
473 ; CHECK-NEXT: vle16.v v9, (a0)
474 ; CHECK-NEXT: vwsubu.vx v8, v9, a1
476 %a = load <2 x i16>, ptr %x
477 %b = insertelement <2 x i16> poison, i16 %y, i32 0
478 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
479 %d = zext <2 x i16> %a to <2 x i32>
480 %e = zext <2 x i16> %c to <2 x i32>
481 %f = sub <2 x i32> %d, %e
485 define <8 x i16> @vwsubu_vx_v8i16(ptr %x, i8 %y) {
486 ; CHECK-LABEL: vwsubu_vx_v8i16:
488 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
489 ; CHECK-NEXT: vle8.v v9, (a0)
490 ; CHECK-NEXT: vwsubu.vx v8, v9, a1
492 %a = load <8 x i8>, ptr %x
493 %b = insertelement <8 x i8> poison, i8 %y, i32 0
494 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
495 %d = zext <8 x i8> %a to <8 x i16>
496 %e = zext <8 x i8> %c to <8 x i16>
497 %f = sub <8 x i16> %d, %e
501 define <4 x i32> @vwsubu_vx_v4i32(ptr %x, i16 %y) {
502 ; CHECK-LABEL: vwsubu_vx_v4i32:
504 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
505 ; CHECK-NEXT: vle16.v v9, (a0)
506 ; CHECK-NEXT: vwsubu.vx v8, v9, a1
508 %a = load <4 x i16>, ptr %x
509 %b = insertelement <4 x i16> poison, i16 %y, i32 0
510 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
511 %d = zext <4 x i16> %a to <4 x i32>
512 %e = zext <4 x i16> %c to <4 x i32>
513 %f = sub <4 x i32> %d, %e
517 define <2 x i64> @vwsubu_vx_v2i64(ptr %x, i32 %y) {
518 ; CHECK-LABEL: vwsubu_vx_v2i64:
520 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
521 ; CHECK-NEXT: vle32.v v9, (a0)
522 ; CHECK-NEXT: vwsubu.vx v8, v9, a1
524 %a = load <2 x i32>, ptr %x
525 %b = insertelement <2 x i32> poison, i32 %y, i64 0
526 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
527 %d = zext <2 x i32> %a to <2 x i64>
528 %e = zext <2 x i32> %c to <2 x i64>
529 %f = sub <2 x i64> %d, %e
533 define <16 x i16> @vwsubu_vx_v16i16(ptr %x, i8 %y) {
534 ; CHECK-LABEL: vwsubu_vx_v16i16:
536 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
537 ; CHECK-NEXT: vle8.v v10, (a0)
538 ; CHECK-NEXT: vwsubu.vx v8, v10, a1
540 %a = load <16 x i8>, ptr %x
541 %b = insertelement <16 x i8> poison, i8 %y, i32 0
542 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
543 %d = zext <16 x i8> %a to <16 x i16>
544 %e = zext <16 x i8> %c to <16 x i16>
545 %f = sub <16 x i16> %d, %e
549 define <8 x i32> @vwsubu_vx_v8i32(ptr %x, i16 %y) {
550 ; CHECK-LABEL: vwsubu_vx_v8i32:
552 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
553 ; CHECK-NEXT: vle16.v v10, (a0)
554 ; CHECK-NEXT: vwsubu.vx v8, v10, a1
556 %a = load <8 x i16>, ptr %x
557 %b = insertelement <8 x i16> poison, i16 %y, i32 0
558 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
559 %d = zext <8 x i16> %a to <8 x i32>
560 %e = zext <8 x i16> %c to <8 x i32>
561 %f = sub <8 x i32> %d, %e
565 define <4 x i64> @vwsubu_vx_v4i64(ptr %x, i32 %y) {
566 ; CHECK-LABEL: vwsubu_vx_v4i64:
568 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
569 ; CHECK-NEXT: vle32.v v10, (a0)
570 ; CHECK-NEXT: vwsubu.vx v8, v10, a1
572 %a = load <4 x i32>, ptr %x
573 %b = insertelement <4 x i32> poison, i32 %y, i64 0
574 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
575 %d = zext <4 x i32> %a to <4 x i64>
576 %e = zext <4 x i32> %c to <4 x i64>
577 %f = sub <4 x i64> %d, %e
581 define <32 x i16> @vwsubu_vx_v32i16(ptr %x, i8 %y) {
582 ; CHECK-LABEL: vwsubu_vx_v32i16:
584 ; CHECK-NEXT: li a2, 32
585 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
586 ; CHECK-NEXT: vle8.v v12, (a0)
587 ; CHECK-NEXT: vwsubu.vx v8, v12, a1
589 %a = load <32 x i8>, ptr %x
590 %b = insertelement <32 x i8> poison, i8 %y, i32 0
591 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
592 %d = zext <32 x i8> %a to <32 x i16>
593 %e = zext <32 x i8> %c to <32 x i16>
594 %f = sub <32 x i16> %d, %e
598 define <16 x i32> @vwsubu_vx_v16i32(ptr %x, i16 %y) {
599 ; CHECK-LABEL: vwsubu_vx_v16i32:
601 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
602 ; CHECK-NEXT: vle16.v v12, (a0)
603 ; CHECK-NEXT: vwsubu.vx v8, v12, a1
605 %a = load <16 x i16>, ptr %x
606 %b = insertelement <16 x i16> poison, i16 %y, i32 0
607 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
608 %d = zext <16 x i16> %a to <16 x i32>
609 %e = zext <16 x i16> %c to <16 x i32>
610 %f = sub <16 x i32> %d, %e
614 define <8 x i64> @vwsubu_vx_v8i64(ptr %x, i32 %y) {
615 ; CHECK-LABEL: vwsubu_vx_v8i64:
617 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
618 ; CHECK-NEXT: vle32.v v12, (a0)
619 ; CHECK-NEXT: vwsubu.vx v8, v12, a1
621 %a = load <8 x i32>, ptr %x
622 %b = insertelement <8 x i32> poison, i32 %y, i64 0
623 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
624 %d = zext <8 x i32> %a to <8 x i64>
625 %e = zext <8 x i32> %c to <8 x i64>
626 %f = sub <8 x i64> %d, %e
630 define <64 x i16> @vwsubu_vx_v64i16(ptr %x, i8 %y) {
631 ; CHECK-LABEL: vwsubu_vx_v64i16:
633 ; CHECK-NEXT: li a2, 64
634 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
635 ; CHECK-NEXT: vle8.v v16, (a0)
636 ; CHECK-NEXT: vwsubu.vx v8, v16, a1
638 %a = load <64 x i8>, ptr %x
639 %b = insertelement <64 x i8> poison, i8 %y, i32 0
640 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
641 %d = zext <64 x i8> %a to <64 x i16>
642 %e = zext <64 x i8> %c to <64 x i16>
643 %f = sub <64 x i16> %d, %e
647 define <32 x i32> @vwsubu_vx_v32i32(ptr %x, i16 %y) {
648 ; CHECK-LABEL: vwsubu_vx_v32i32:
650 ; CHECK-NEXT: li a2, 32
651 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
652 ; CHECK-NEXT: vle16.v v16, (a0)
653 ; CHECK-NEXT: vwsubu.vx v8, v16, a1
655 %a = load <32 x i16>, ptr %x
656 %b = insertelement <32 x i16> poison, i16 %y, i32 0
657 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
658 %d = zext <32 x i16> %a to <32 x i32>
659 %e = zext <32 x i16> %c to <32 x i32>
660 %f = sub <32 x i32> %d, %e
664 define <16 x i64> @vwsubu_vx_v16i64(ptr %x, i32 %y) {
665 ; CHECK-LABEL: vwsubu_vx_v16i64:
667 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
668 ; CHECK-NEXT: vle32.v v16, (a0)
669 ; CHECK-NEXT: vwsubu.vx v8, v16, a1
671 %a = load <16 x i32>, ptr %x
672 %b = insertelement <16 x i32> poison, i32 %y, i64 0
673 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
674 %d = zext <16 x i32> %a to <16 x i64>
675 %e = zext <16 x i32> %c to <16 x i64>
676 %f = sub <16 x i64> %d, %e
680 define <8 x i16> @vwsubu_vx_v8i16_i8(ptr %x, ptr %y) {
681 ; CHECK-LABEL: vwsubu_vx_v8i16_i8:
683 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
684 ; CHECK-NEXT: vle8.v v9, (a0)
685 ; CHECK-NEXT: vlse8.v v10, (a1), zero
686 ; CHECK-NEXT: vwsubu.vv v8, v10, v9
688 %a = load <8 x i8>, ptr %x
690 %c = zext i8 %b to i16
691 %d = insertelement <8 x i16> poison, i16 %c, i32 0
692 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
693 %f = zext <8 x i8> %a to <8 x i16>
694 %g = sub <8 x i16> %e, %f
698 define <8 x i16> @vwsubu_vx_v8i16_i16(ptr %x, ptr %y) {
699 ; CHECK-LABEL: vwsubu_vx_v8i16_i16:
701 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
702 ; CHECK-NEXT: vle8.v v9, (a0)
703 ; CHECK-NEXT: vlse16.v v8, (a1), zero
704 ; CHECK-NEXT: vwsubu.wv v8, v8, v9
706 %a = load <8 x i8>, ptr %x
707 %b = load i16, ptr %y
708 %d = insertelement <8 x i16> poison, i16 %b, i32 0
709 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
710 %f = zext <8 x i8> %a to <8 x i16>
711 %g = sub <8 x i16> %e, %f
715 define <4 x i32> @vwsubu_vx_v4i32_i8(ptr %x, ptr %y) {
716 ; CHECK-LABEL: vwsubu_vx_v4i32_i8:
718 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
719 ; CHECK-NEXT: lbu a1, 0(a1)
720 ; CHECK-NEXT: vle16.v v9, (a0)
721 ; CHECK-NEXT: vmv.v.x v10, a1
722 ; CHECK-NEXT: vwsubu.vv v8, v10, v9
724 %a = load <4 x i16>, ptr %x
726 %c = zext i8 %b to i32
727 %d = insertelement <4 x i32> poison, i32 %c, i32 0
728 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
729 %f = zext <4 x i16> %a to <4 x i32>
730 %g = sub <4 x i32> %e, %f
734 define <4 x i32> @vwsubu_vx_v4i32_i16(ptr %x, ptr %y) {
735 ; CHECK-LABEL: vwsubu_vx_v4i32_i16:
737 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
738 ; CHECK-NEXT: vle16.v v9, (a0)
739 ; CHECK-NEXT: vlse16.v v10, (a1), zero
740 ; CHECK-NEXT: vwsubu.vv v8, v10, v9
742 %a = load <4 x i16>, ptr %x
743 %b = load i16, ptr %y
744 %c = zext i16 %b to i32
745 %d = insertelement <4 x i32> poison, i32 %c, i32 0
746 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
747 %f = zext <4 x i16> %a to <4 x i32>
748 %g = sub <4 x i32> %e, %f
752 define <4 x i32> @vwsubu_vx_v4i32_i32(ptr %x, ptr %y) {
753 ; CHECK-LABEL: vwsubu_vx_v4i32_i32:
755 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
756 ; CHECK-NEXT: vle16.v v9, (a0)
757 ; CHECK-NEXT: vlse32.v v8, (a1), zero
758 ; CHECK-NEXT: vwsubu.wv v8, v8, v9
760 %a = load <4 x i16>, ptr %x
761 %b = load i32, ptr %y
762 %d = insertelement <4 x i32> poison, i32 %b, i32 0
763 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
764 %f = zext <4 x i16> %a to <4 x i32>
765 %g = sub <4 x i32> %e, %f
769 define <2 x i64> @vwsubu_vx_v2i64_i8(ptr %x, ptr %y) nounwind {
770 ; RV32-LABEL: vwsubu_vx_v2i64_i8:
772 ; RV32-NEXT: addi sp, sp, -16
773 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
774 ; RV32-NEXT: lbu a1, 0(a1)
775 ; RV32-NEXT: vle32.v v9, (a0)
776 ; RV32-NEXT: sw zero, 12(sp)
777 ; RV32-NEXT: sw a1, 8(sp)
778 ; RV32-NEXT: addi a0, sp, 8
779 ; RV32-NEXT: vlse64.v v8, (a0), zero
780 ; RV32-NEXT: vwsubu.wv v8, v8, v9
781 ; RV32-NEXT: addi sp, sp, 16
784 ; RV64-LABEL: vwsubu_vx_v2i64_i8:
786 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
787 ; RV64-NEXT: lbu a1, 0(a1)
788 ; RV64-NEXT: vle32.v v9, (a0)
789 ; RV64-NEXT: vmv.v.x v10, a1
790 ; RV64-NEXT: vwsubu.vv v8, v10, v9
792 %a = load <2 x i32>, ptr %x
794 %c = zext i8 %b to i64
795 %d = insertelement <2 x i64> poison, i64 %c, i64 0
796 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
797 %f = zext <2 x i32> %a to <2 x i64>
798 %g = sub <2 x i64> %e, %f
802 define <2 x i64> @vwsubu_vx_v2i64_i16(ptr %x, ptr %y) nounwind {
803 ; RV32-LABEL: vwsubu_vx_v2i64_i16:
805 ; RV32-NEXT: addi sp, sp, -16
806 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
807 ; RV32-NEXT: lhu a1, 0(a1)
808 ; RV32-NEXT: vle32.v v9, (a0)
809 ; RV32-NEXT: sw zero, 12(sp)
810 ; RV32-NEXT: sw a1, 8(sp)
811 ; RV32-NEXT: addi a0, sp, 8
812 ; RV32-NEXT: vlse64.v v8, (a0), zero
813 ; RV32-NEXT: vwsubu.wv v8, v8, v9
814 ; RV32-NEXT: addi sp, sp, 16
817 ; RV64-LABEL: vwsubu_vx_v2i64_i16:
819 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
820 ; RV64-NEXT: lhu a1, 0(a1)
821 ; RV64-NEXT: vle32.v v9, (a0)
822 ; RV64-NEXT: vmv.v.x v10, a1
823 ; RV64-NEXT: vwsubu.vv v8, v10, v9
825 %a = load <2 x i32>, ptr %x
826 %b = load i16, ptr %y
827 %c = zext i16 %b to i64
828 %d = insertelement <2 x i64> poison, i64 %c, i64 0
829 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
830 %f = zext <2 x i32> %a to <2 x i64>
831 %g = sub <2 x i64> %e, %f
835 define <2 x i64> @vwsubu_vx_v2i64_i32(ptr %x, ptr %y) nounwind {
836 ; RV32-LABEL: vwsubu_vx_v2i64_i32:
838 ; RV32-NEXT: addi sp, sp, -16
839 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
840 ; RV32-NEXT: lw a1, 0(a1)
841 ; RV32-NEXT: vle32.v v9, (a0)
842 ; RV32-NEXT: sw zero, 12(sp)
843 ; RV32-NEXT: sw a1, 8(sp)
844 ; RV32-NEXT: addi a0, sp, 8
845 ; RV32-NEXT: vlse64.v v8, (a0), zero
846 ; RV32-NEXT: vwsubu.wv v8, v8, v9
847 ; RV32-NEXT: addi sp, sp, 16
850 ; RV64-LABEL: vwsubu_vx_v2i64_i32:
852 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
853 ; RV64-NEXT: vle32.v v9, (a0)
854 ; RV64-NEXT: vlse32.v v10, (a1), zero
855 ; RV64-NEXT: vwsubu.vv v8, v10, v9
857 %a = load <2 x i32>, ptr %x
858 %b = load i32, ptr %y
859 %c = zext i32 %b to i64
860 %d = insertelement <2 x i64> poison, i64 %c, i64 0
861 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
862 %f = zext <2 x i32> %a to <2 x i64>
863 %g = sub <2 x i64> %e, %f
867 define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind {
868 ; RV32-LABEL: vwsubu_vx_v2i64_i64:
870 ; RV32-NEXT: addi sp, sp, -16
871 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
872 ; RV32-NEXT: lw a2, 4(a1)
873 ; RV32-NEXT: lw a1, 0(a1)
874 ; RV32-NEXT: vle32.v v9, (a0)
875 ; RV32-NEXT: sw a2, 12(sp)
876 ; RV32-NEXT: sw a1, 8(sp)
877 ; RV32-NEXT: addi a0, sp, 8
878 ; RV32-NEXT: vlse64.v v8, (a0), zero
879 ; RV32-NEXT: vwsubu.wv v8, v8, v9
880 ; RV32-NEXT: addi sp, sp, 16
883 ; RV64-LABEL: vwsubu_vx_v2i64_i64:
885 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
886 ; RV64-NEXT: vle32.v v9, (a0)
887 ; RV64-NEXT: vlse64.v v8, (a1), zero
888 ; RV64-NEXT: vwsubu.wv v8, v8, v9
890 %a = load <2 x i32>, ptr %x
891 %b = load i64, ptr %y
892 %d = insertelement <2 x i64> poison, i64 %b, i64 0
893 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
894 %f = zext <2 x i32> %a to <2 x i64>
895 %g = sub <2 x i64> %e, %f