1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64
4 ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32
5 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64
7 define void @add_v16i8(ptr %x, ptr %y) {
8 ; CHECK-LABEL: add_v16i8:
10 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
11 ; CHECK-NEXT: vle8.v v8, (a0)
12 ; CHECK-NEXT: vle8.v v9, (a1)
13 ; CHECK-NEXT: vadd.vv v8, v8, v9
14 ; CHECK-NEXT: vse8.v v8, (a0)
16 %a = load <16 x i8>, ptr %x
17 %b = load <16 x i8>, ptr %y
18 %c = add <16 x i8> %a, %b
19 store <16 x i8> %c, ptr %x
23 define void @add_v8i16(ptr %x, ptr %y) {
24 ; CHECK-LABEL: add_v8i16:
26 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
27 ; CHECK-NEXT: vle16.v v8, (a0)
28 ; CHECK-NEXT: vle16.v v9, (a1)
29 ; CHECK-NEXT: vadd.vv v8, v8, v9
30 ; CHECK-NEXT: vse16.v v8, (a0)
32 %a = load <8 x i16>, ptr %x
33 %b = load <8 x i16>, ptr %y
34 %c = add <8 x i16> %a, %b
35 store <8 x i16> %c, ptr %x
39 define void @add_v6i16(ptr %x, ptr %y) {
40 ; CHECK-LABEL: add_v6i16:
42 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
43 ; CHECK-NEXT: vle16.v v8, (a0)
44 ; CHECK-NEXT: vle16.v v9, (a1)
45 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
46 ; CHECK-NEXT: vadd.vv v8, v8, v9
47 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
48 ; CHECK-NEXT: vse16.v v8, (a0)
50 %a = load <6 x i16>, ptr %x
51 %b = load <6 x i16>, ptr %y
52 %c = add <6 x i16> %a, %b
53 store <6 x i16> %c, ptr %x
57 define void @add_v4i32(ptr %x, ptr %y) {
58 ; CHECK-LABEL: add_v4i32:
60 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
61 ; CHECK-NEXT: vle32.v v8, (a0)
62 ; CHECK-NEXT: vle32.v v9, (a1)
63 ; CHECK-NEXT: vadd.vv v8, v8, v9
64 ; CHECK-NEXT: vse32.v v8, (a0)
66 %a = load <4 x i32>, ptr %x
67 %b = load <4 x i32>, ptr %y
68 %c = add <4 x i32> %a, %b
69 store <4 x i32> %c, ptr %x
73 define void @add_v2i64(ptr %x, ptr %y) {
74 ; CHECK-LABEL: add_v2i64:
76 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
77 ; CHECK-NEXT: vle64.v v8, (a0)
78 ; CHECK-NEXT: vle64.v v9, (a1)
79 ; CHECK-NEXT: vadd.vv v8, v8, v9
80 ; CHECK-NEXT: vse64.v v8, (a0)
82 %a = load <2 x i64>, ptr %x
83 %b = load <2 x i64>, ptr %y
84 %c = add <2 x i64> %a, %b
85 store <2 x i64> %c, ptr %x
89 define void @sub_v16i8(ptr %x, ptr %y) {
90 ; CHECK-LABEL: sub_v16i8:
92 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
93 ; CHECK-NEXT: vle8.v v8, (a0)
94 ; CHECK-NEXT: vle8.v v9, (a1)
95 ; CHECK-NEXT: vsub.vv v8, v8, v9
96 ; CHECK-NEXT: vse8.v v8, (a0)
98 %a = load <16 x i8>, ptr %x
99 %b = load <16 x i8>, ptr %y
100 %c = sub <16 x i8> %a, %b
101 store <16 x i8> %c, ptr %x
105 define void @sub_v8i16(ptr %x, ptr %y) {
106 ; CHECK-LABEL: sub_v8i16:
108 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
109 ; CHECK-NEXT: vle16.v v8, (a0)
110 ; CHECK-NEXT: vle16.v v9, (a1)
111 ; CHECK-NEXT: vsub.vv v8, v8, v9
112 ; CHECK-NEXT: vse16.v v8, (a0)
114 %a = load <8 x i16>, ptr %x
115 %b = load <8 x i16>, ptr %y
116 %c = sub <8 x i16> %a, %b
117 store <8 x i16> %c, ptr %x
121 define void @sub_v6i16(ptr %x, ptr %y) {
122 ; CHECK-LABEL: sub_v6i16:
124 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
125 ; CHECK-NEXT: vle16.v v8, (a0)
126 ; CHECK-NEXT: vle16.v v9, (a1)
127 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
128 ; CHECK-NEXT: vsub.vv v8, v8, v9
129 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
130 ; CHECK-NEXT: vse16.v v8, (a0)
132 %a = load <6 x i16>, ptr %x
133 %b = load <6 x i16>, ptr %y
134 %c = sub <6 x i16> %a, %b
135 store <6 x i16> %c, ptr %x
139 define void @sub_v4i32(ptr %x, ptr %y) {
140 ; CHECK-LABEL: sub_v4i32:
142 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
143 ; CHECK-NEXT: vle32.v v8, (a0)
144 ; CHECK-NEXT: vle32.v v9, (a1)
145 ; CHECK-NEXT: vsub.vv v8, v8, v9
146 ; CHECK-NEXT: vse32.v v8, (a0)
148 %a = load <4 x i32>, ptr %x
149 %b = load <4 x i32>, ptr %y
150 %c = sub <4 x i32> %a, %b
151 store <4 x i32> %c, ptr %x
155 define void @sub_v2i64(ptr %x, ptr %y) {
156 ; CHECK-LABEL: sub_v2i64:
158 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
159 ; CHECK-NEXT: vle64.v v8, (a0)
160 ; CHECK-NEXT: vle64.v v9, (a1)
161 ; CHECK-NEXT: vsub.vv v8, v8, v9
162 ; CHECK-NEXT: vse64.v v8, (a0)
164 %a = load <2 x i64>, ptr %x
165 %b = load <2 x i64>, ptr %y
166 %c = sub <2 x i64> %a, %b
167 store <2 x i64> %c, ptr %x
171 define void @mul_v16i8(ptr %x, ptr %y) {
172 ; CHECK-LABEL: mul_v16i8:
174 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
175 ; CHECK-NEXT: vle8.v v8, (a0)
176 ; CHECK-NEXT: vle8.v v9, (a1)
177 ; CHECK-NEXT: vmul.vv v8, v8, v9
178 ; CHECK-NEXT: vse8.v v8, (a0)
180 %a = load <16 x i8>, ptr %x
181 %b = load <16 x i8>, ptr %y
182 %c = mul <16 x i8> %a, %b
183 store <16 x i8> %c, ptr %x
187 define void @mul_v8i16(ptr %x, ptr %y) {
188 ; CHECK-LABEL: mul_v8i16:
190 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
191 ; CHECK-NEXT: vle16.v v8, (a0)
192 ; CHECK-NEXT: vle16.v v9, (a1)
193 ; CHECK-NEXT: vmul.vv v8, v8, v9
194 ; CHECK-NEXT: vse16.v v8, (a0)
196 %a = load <8 x i16>, ptr %x
197 %b = load <8 x i16>, ptr %y
198 %c = mul <8 x i16> %a, %b
199 store <8 x i16> %c, ptr %x
203 define void @mul_v6i16(ptr %x, ptr %y) {
204 ; CHECK-LABEL: mul_v6i16:
206 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
207 ; CHECK-NEXT: vle16.v v8, (a0)
208 ; CHECK-NEXT: vle16.v v9, (a1)
209 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
210 ; CHECK-NEXT: vmul.vv v8, v8, v9
211 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
212 ; CHECK-NEXT: vse16.v v8, (a0)
214 %a = load <6 x i16>, ptr %x
215 %b = load <6 x i16>, ptr %y
216 %c = mul <6 x i16> %a, %b
217 store <6 x i16> %c, ptr %x
221 define void @mul_v4i32(ptr %x, ptr %y) {
222 ; CHECK-LABEL: mul_v4i32:
224 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
225 ; CHECK-NEXT: vle32.v v8, (a0)
226 ; CHECK-NEXT: vle32.v v9, (a1)
227 ; CHECK-NEXT: vmul.vv v8, v8, v9
228 ; CHECK-NEXT: vse32.v v8, (a0)
230 %a = load <4 x i32>, ptr %x
231 %b = load <4 x i32>, ptr %y
232 %c = mul <4 x i32> %a, %b
233 store <4 x i32> %c, ptr %x
237 define void @mul_v2i64(ptr %x, ptr %y) {
238 ; CHECK-LABEL: mul_v2i64:
240 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
241 ; CHECK-NEXT: vle64.v v8, (a0)
242 ; CHECK-NEXT: vle64.v v9, (a1)
243 ; CHECK-NEXT: vmul.vv v8, v8, v9
244 ; CHECK-NEXT: vse64.v v8, (a0)
246 %a = load <2 x i64>, ptr %x
247 %b = load <2 x i64>, ptr %y
248 %c = mul <2 x i64> %a, %b
249 store <2 x i64> %c, ptr %x
253 define void @and_v16i8(ptr %x, ptr %y) {
254 ; CHECK-LABEL: and_v16i8:
256 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
257 ; CHECK-NEXT: vle8.v v8, (a0)
258 ; CHECK-NEXT: vle8.v v9, (a1)
259 ; CHECK-NEXT: vand.vv v8, v8, v9
260 ; CHECK-NEXT: vse8.v v8, (a0)
262 %a = load <16 x i8>, ptr %x
263 %b = load <16 x i8>, ptr %y
264 %c = and <16 x i8> %a, %b
265 store <16 x i8> %c, ptr %x
269 define void @and_v8i16(ptr %x, ptr %y) {
270 ; CHECK-LABEL: and_v8i16:
272 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
273 ; CHECK-NEXT: vle16.v v8, (a0)
274 ; CHECK-NEXT: vle16.v v9, (a1)
275 ; CHECK-NEXT: vand.vv v8, v8, v9
276 ; CHECK-NEXT: vse16.v v8, (a0)
278 %a = load <8 x i16>, ptr %x
279 %b = load <8 x i16>, ptr %y
280 %c = and <8 x i16> %a, %b
281 store <8 x i16> %c, ptr %x
285 define void @and_v6i16(ptr %x, ptr %y) {
286 ; CHECK-LABEL: and_v6i16:
288 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
289 ; CHECK-NEXT: vle16.v v8, (a0)
290 ; CHECK-NEXT: vle16.v v9, (a1)
291 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
292 ; CHECK-NEXT: vand.vv v8, v8, v9
293 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
294 ; CHECK-NEXT: vse16.v v8, (a0)
296 %a = load <6 x i16>, ptr %x
297 %b = load <6 x i16>, ptr %y
298 %c = and <6 x i16> %a, %b
299 store <6 x i16> %c, ptr %x
303 define void @and_v4i32(ptr %x, ptr %y) {
304 ; CHECK-LABEL: and_v4i32:
306 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
307 ; CHECK-NEXT: vle32.v v8, (a0)
308 ; CHECK-NEXT: vle32.v v9, (a1)
309 ; CHECK-NEXT: vand.vv v8, v8, v9
310 ; CHECK-NEXT: vse32.v v8, (a0)
312 %a = load <4 x i32>, ptr %x
313 %b = load <4 x i32>, ptr %y
314 %c = and <4 x i32> %a, %b
315 store <4 x i32> %c, ptr %x
319 define void @and_v2i64(ptr %x, ptr %y) {
320 ; CHECK-LABEL: and_v2i64:
322 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
323 ; CHECK-NEXT: vle64.v v8, (a0)
324 ; CHECK-NEXT: vle64.v v9, (a1)
325 ; CHECK-NEXT: vand.vv v8, v8, v9
326 ; CHECK-NEXT: vse64.v v8, (a0)
328 %a = load <2 x i64>, ptr %x
329 %b = load <2 x i64>, ptr %y
330 %c = and <2 x i64> %a, %b
331 store <2 x i64> %c, ptr %x
335 define void @or_v16i8(ptr %x, ptr %y) {
336 ; CHECK-LABEL: or_v16i8:
338 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
339 ; CHECK-NEXT: vle8.v v8, (a0)
340 ; CHECK-NEXT: vle8.v v9, (a1)
341 ; CHECK-NEXT: vor.vv v8, v8, v9
342 ; CHECK-NEXT: vse8.v v8, (a0)
344 %a = load <16 x i8>, ptr %x
345 %b = load <16 x i8>, ptr %y
346 %c = or <16 x i8> %a, %b
347 store <16 x i8> %c, ptr %x
351 define void @or_v8i16(ptr %x, ptr %y) {
352 ; CHECK-LABEL: or_v8i16:
354 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
355 ; CHECK-NEXT: vle16.v v8, (a0)
356 ; CHECK-NEXT: vle16.v v9, (a1)
357 ; CHECK-NEXT: vor.vv v8, v8, v9
358 ; CHECK-NEXT: vse16.v v8, (a0)
360 %a = load <8 x i16>, ptr %x
361 %b = load <8 x i16>, ptr %y
362 %c = or <8 x i16> %a, %b
363 store <8 x i16> %c, ptr %x
367 define void @or_v6i16(ptr %x, ptr %y) {
368 ; CHECK-LABEL: or_v6i16:
370 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
371 ; CHECK-NEXT: vle16.v v8, (a0)
372 ; CHECK-NEXT: vle16.v v9, (a1)
373 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
374 ; CHECK-NEXT: vor.vv v8, v8, v9
375 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
376 ; CHECK-NEXT: vse16.v v8, (a0)
378 %a = load <6 x i16>, ptr %x
379 %b = load <6 x i16>, ptr %y
380 %c = or <6 x i16> %a, %b
381 store <6 x i16> %c, ptr %x
385 define void @or_v4i32(ptr %x, ptr %y) {
386 ; CHECK-LABEL: or_v4i32:
388 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
389 ; CHECK-NEXT: vle32.v v8, (a0)
390 ; CHECK-NEXT: vle32.v v9, (a1)
391 ; CHECK-NEXT: vor.vv v8, v8, v9
392 ; CHECK-NEXT: vse32.v v8, (a0)
394 %a = load <4 x i32>, ptr %x
395 %b = load <4 x i32>, ptr %y
396 %c = or <4 x i32> %a, %b
397 store <4 x i32> %c, ptr %x
401 define void @or_v2i64(ptr %x, ptr %y) {
402 ; CHECK-LABEL: or_v2i64:
404 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
405 ; CHECK-NEXT: vle64.v v8, (a0)
406 ; CHECK-NEXT: vle64.v v9, (a1)
407 ; CHECK-NEXT: vor.vv v8, v8, v9
408 ; CHECK-NEXT: vse64.v v8, (a0)
410 %a = load <2 x i64>, ptr %x
411 %b = load <2 x i64>, ptr %y
412 %c = or <2 x i64> %a, %b
413 store <2 x i64> %c, ptr %x
417 define void @xor_v16i8(ptr %x, ptr %y) {
418 ; CHECK-LABEL: xor_v16i8:
420 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
421 ; CHECK-NEXT: vle8.v v8, (a0)
422 ; CHECK-NEXT: vle8.v v9, (a1)
423 ; CHECK-NEXT: vxor.vv v8, v8, v9
424 ; CHECK-NEXT: vse8.v v8, (a0)
426 %a = load <16 x i8>, ptr %x
427 %b = load <16 x i8>, ptr %y
428 %c = xor <16 x i8> %a, %b
429 store <16 x i8> %c, ptr %x
433 define void @xor_v8i16(ptr %x, ptr %y) {
434 ; CHECK-LABEL: xor_v8i16:
436 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
437 ; CHECK-NEXT: vle16.v v8, (a0)
438 ; CHECK-NEXT: vle16.v v9, (a1)
439 ; CHECK-NEXT: vxor.vv v8, v8, v9
440 ; CHECK-NEXT: vse16.v v8, (a0)
442 %a = load <8 x i16>, ptr %x
443 %b = load <8 x i16>, ptr %y
444 %c = xor <8 x i16> %a, %b
445 store <8 x i16> %c, ptr %x
449 define void @xor_v6i16(ptr %x, ptr %y) {
450 ; CHECK-LABEL: xor_v6i16:
452 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
453 ; CHECK-NEXT: vle16.v v8, (a0)
454 ; CHECK-NEXT: vle16.v v9, (a1)
455 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
456 ; CHECK-NEXT: vxor.vv v8, v8, v9
457 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
458 ; CHECK-NEXT: vse16.v v8, (a0)
460 %a = load <6 x i16>, ptr %x
461 %b = load <6 x i16>, ptr %y
462 %c = xor <6 x i16> %a, %b
463 store <6 x i16> %c, ptr %x
467 define void @xor_v4i32(ptr %x, ptr %y) {
468 ; CHECK-LABEL: xor_v4i32:
470 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
471 ; CHECK-NEXT: vle32.v v8, (a0)
472 ; CHECK-NEXT: vle32.v v9, (a1)
473 ; CHECK-NEXT: vxor.vv v8, v8, v9
474 ; CHECK-NEXT: vse32.v v8, (a0)
476 %a = load <4 x i32>, ptr %x
477 %b = load <4 x i32>, ptr %y
478 %c = xor <4 x i32> %a, %b
479 store <4 x i32> %c, ptr %x
483 define void @xor_v2i64(ptr %x, ptr %y) {
484 ; CHECK-LABEL: xor_v2i64:
486 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
487 ; CHECK-NEXT: vle64.v v8, (a0)
488 ; CHECK-NEXT: vle64.v v9, (a1)
489 ; CHECK-NEXT: vxor.vv v8, v8, v9
490 ; CHECK-NEXT: vse64.v v8, (a0)
492 %a = load <2 x i64>, ptr %x
493 %b = load <2 x i64>, ptr %y
494 %c = xor <2 x i64> %a, %b
495 store <2 x i64> %c, ptr %x
499 define void @lshr_v16i8(ptr %x, ptr %y) {
500 ; CHECK-LABEL: lshr_v16i8:
502 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
503 ; CHECK-NEXT: vle8.v v8, (a0)
504 ; CHECK-NEXT: vle8.v v9, (a1)
505 ; CHECK-NEXT: vsrl.vv v8, v8, v9
506 ; CHECK-NEXT: vse8.v v8, (a0)
508 %a = load <16 x i8>, ptr %x
509 %b = load <16 x i8>, ptr %y
510 %c = lshr <16 x i8> %a, %b
511 store <16 x i8> %c, ptr %x
515 define void @lshr_v8i16(ptr %x, ptr %y) {
516 ; CHECK-LABEL: lshr_v8i16:
518 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
519 ; CHECK-NEXT: vle16.v v8, (a0)
520 ; CHECK-NEXT: vle16.v v9, (a1)
521 ; CHECK-NEXT: vsrl.vv v8, v8, v9
522 ; CHECK-NEXT: vse16.v v8, (a0)
524 %a = load <8 x i16>, ptr %x
525 %b = load <8 x i16>, ptr %y
526 %c = lshr <8 x i16> %a, %b
527 store <8 x i16> %c, ptr %x
531 define void @lshr_v6i16(ptr %x, ptr %y) {
532 ; CHECK-LABEL: lshr_v6i16:
534 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
535 ; CHECK-NEXT: vle16.v v8, (a0)
536 ; CHECK-NEXT: vle16.v v9, (a1)
537 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
538 ; CHECK-NEXT: vsrl.vv v8, v8, v9
539 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
540 ; CHECK-NEXT: vse16.v v8, (a0)
542 %a = load <6 x i16>, ptr %x
543 %b = load <6 x i16>, ptr %y
544 %c = lshr <6 x i16> %a, %b
545 store <6 x i16> %c, ptr %x
549 define void @lshr_v4i32(ptr %x, ptr %y) {
550 ; CHECK-LABEL: lshr_v4i32:
552 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
553 ; CHECK-NEXT: vle32.v v8, (a0)
554 ; CHECK-NEXT: vle32.v v9, (a1)
555 ; CHECK-NEXT: vsrl.vv v8, v8, v9
556 ; CHECK-NEXT: vse32.v v8, (a0)
558 %a = load <4 x i32>, ptr %x
559 %b = load <4 x i32>, ptr %y
560 %c = lshr <4 x i32> %a, %b
561 store <4 x i32> %c, ptr %x
565 define void @lshr_v2i64(ptr %x, ptr %y) {
566 ; CHECK-LABEL: lshr_v2i64:
568 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
569 ; CHECK-NEXT: vle64.v v8, (a0)
570 ; CHECK-NEXT: vle64.v v9, (a1)
571 ; CHECK-NEXT: vsrl.vv v8, v8, v9
572 ; CHECK-NEXT: vse64.v v8, (a0)
574 %a = load <2 x i64>, ptr %x
575 %b = load <2 x i64>, ptr %y
576 %c = lshr <2 x i64> %a, %b
577 store <2 x i64> %c, ptr %x
581 define void @ashr_v16i8(ptr %x, ptr %y) {
582 ; CHECK-LABEL: ashr_v16i8:
584 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
585 ; CHECK-NEXT: vle8.v v8, (a0)
586 ; CHECK-NEXT: vle8.v v9, (a1)
587 ; CHECK-NEXT: vsra.vv v8, v8, v9
588 ; CHECK-NEXT: vse8.v v8, (a0)
590 %a = load <16 x i8>, ptr %x
591 %b = load <16 x i8>, ptr %y
592 %c = ashr <16 x i8> %a, %b
593 store <16 x i8> %c, ptr %x
597 define void @ashr_v8i16(ptr %x, ptr %y) {
598 ; CHECK-LABEL: ashr_v8i16:
600 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
601 ; CHECK-NEXT: vle16.v v8, (a0)
602 ; CHECK-NEXT: vle16.v v9, (a1)
603 ; CHECK-NEXT: vsra.vv v8, v8, v9
604 ; CHECK-NEXT: vse16.v v8, (a0)
606 %a = load <8 x i16>, ptr %x
607 %b = load <8 x i16>, ptr %y
608 %c = ashr <8 x i16> %a, %b
609 store <8 x i16> %c, ptr %x
613 define void @ashr_v6i16(ptr %x, ptr %y) {
614 ; CHECK-LABEL: ashr_v6i16:
616 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
617 ; CHECK-NEXT: vle16.v v8, (a0)
618 ; CHECK-NEXT: vle16.v v9, (a1)
619 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
620 ; CHECK-NEXT: vsra.vv v8, v8, v9
621 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
622 ; CHECK-NEXT: vse16.v v8, (a0)
624 %a = load <6 x i16>, ptr %x
625 %b = load <6 x i16>, ptr %y
626 %c = ashr <6 x i16> %a, %b
627 store <6 x i16> %c, ptr %x
631 define void @ashr_v4i32(ptr %x, ptr %y) {
632 ; CHECK-LABEL: ashr_v4i32:
634 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
635 ; CHECK-NEXT: vle32.v v8, (a0)
636 ; CHECK-NEXT: vle32.v v9, (a1)
637 ; CHECK-NEXT: vsra.vv v8, v8, v9
638 ; CHECK-NEXT: vse32.v v8, (a0)
640 %a = load <4 x i32>, ptr %x
641 %b = load <4 x i32>, ptr %y
642 %c = ashr <4 x i32> %a, %b
643 store <4 x i32> %c, ptr %x
647 define void @ashr_v2i64(ptr %x, ptr %y) {
648 ; CHECK-LABEL: ashr_v2i64:
650 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
651 ; CHECK-NEXT: vle64.v v8, (a0)
652 ; CHECK-NEXT: vle64.v v9, (a1)
653 ; CHECK-NEXT: vsra.vv v8, v8, v9
654 ; CHECK-NEXT: vse64.v v8, (a0)
656 %a = load <2 x i64>, ptr %x
657 %b = load <2 x i64>, ptr %y
658 %c = ashr <2 x i64> %a, %b
659 store <2 x i64> %c, ptr %x
663 define void @shl_v16i8(ptr %x, ptr %y) {
664 ; CHECK-LABEL: shl_v16i8:
666 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
667 ; CHECK-NEXT: vle8.v v8, (a0)
668 ; CHECK-NEXT: vle8.v v9, (a1)
669 ; CHECK-NEXT: vsll.vv v8, v8, v9
670 ; CHECK-NEXT: vse8.v v8, (a0)
672 %a = load <16 x i8>, ptr %x
673 %b = load <16 x i8>, ptr %y
674 %c = shl <16 x i8> %a, %b
675 store <16 x i8> %c, ptr %x
679 define void @shl_v8i16(ptr %x, ptr %y) {
680 ; CHECK-LABEL: shl_v8i16:
682 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
683 ; CHECK-NEXT: vle16.v v8, (a0)
684 ; CHECK-NEXT: vle16.v v9, (a1)
685 ; CHECK-NEXT: vsll.vv v8, v8, v9
686 ; CHECK-NEXT: vse16.v v8, (a0)
688 %a = load <8 x i16>, ptr %x
689 %b = load <8 x i16>, ptr %y
690 %c = shl <8 x i16> %a, %b
691 store <8 x i16> %c, ptr %x
695 define void @shl_v6i16(ptr %x, ptr %y) {
696 ; CHECK-LABEL: shl_v6i16:
698 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
699 ; CHECK-NEXT: vle16.v v8, (a0)
700 ; CHECK-NEXT: vle16.v v9, (a1)
701 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
702 ; CHECK-NEXT: vsll.vv v8, v8, v9
703 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
704 ; CHECK-NEXT: vse16.v v8, (a0)
706 %a = load <6 x i16>, ptr %x
707 %b = load <6 x i16>, ptr %y
708 %c = shl <6 x i16> %a, %b
709 store <6 x i16> %c, ptr %x
713 define void @shl_v4i32(ptr %x, ptr %y) {
714 ; CHECK-LABEL: shl_v4i32:
716 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
717 ; CHECK-NEXT: vle32.v v8, (a0)
718 ; CHECK-NEXT: vle32.v v9, (a1)
719 ; CHECK-NEXT: vsll.vv v8, v8, v9
720 ; CHECK-NEXT: vse32.v v8, (a0)
722 %a = load <4 x i32>, ptr %x
723 %b = load <4 x i32>, ptr %y
724 %c = shl <4 x i32> %a, %b
725 store <4 x i32> %c, ptr %x
729 define void @shl_v2i64(ptr %x, ptr %y) {
730 ; CHECK-LABEL: shl_v2i64:
732 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
733 ; CHECK-NEXT: vle64.v v8, (a0)
734 ; CHECK-NEXT: vle64.v v9, (a1)
735 ; CHECK-NEXT: vsll.vv v8, v8, v9
736 ; CHECK-NEXT: vse64.v v8, (a0)
738 %a = load <2 x i64>, ptr %x
739 %b = load <2 x i64>, ptr %y
740 %c = shl <2 x i64> %a, %b
741 store <2 x i64> %c, ptr %x
745 define void @sdiv_v16i8(ptr %x, ptr %y) {
746 ; CHECK-LABEL: sdiv_v16i8:
748 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
749 ; CHECK-NEXT: vle8.v v8, (a0)
750 ; CHECK-NEXT: vle8.v v9, (a1)
751 ; CHECK-NEXT: vdiv.vv v8, v8, v9
752 ; CHECK-NEXT: vse8.v v8, (a0)
754 %a = load <16 x i8>, ptr %x
755 %b = load <16 x i8>, ptr %y
756 %c = sdiv <16 x i8> %a, %b
757 store <16 x i8> %c, ptr %x
761 define void @sdiv_v8i16(ptr %x, ptr %y) {
762 ; CHECK-LABEL: sdiv_v8i16:
764 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
765 ; CHECK-NEXT: vle16.v v8, (a0)
766 ; CHECK-NEXT: vle16.v v9, (a1)
767 ; CHECK-NEXT: vdiv.vv v8, v8, v9
768 ; CHECK-NEXT: vse16.v v8, (a0)
770 %a = load <8 x i16>, ptr %x
771 %b = load <8 x i16>, ptr %y
772 %c = sdiv <8 x i16> %a, %b
773 store <8 x i16> %c, ptr %x
777 define void @sdiv_v6i16(ptr %x, ptr %y) {
778 ; CHECK-LABEL: sdiv_v6i16:
780 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
781 ; CHECK-NEXT: vle16.v v8, (a1)
782 ; CHECK-NEXT: vle16.v v9, (a0)
783 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
784 ; CHECK-NEXT: vslidedown.vi v10, v8, 4
785 ; CHECK-NEXT: vslidedown.vi v11, v9, 4
786 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
787 ; CHECK-NEXT: vdiv.vv v10, v11, v10
788 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
789 ; CHECK-NEXT: vdiv.vv v8, v9, v8
790 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
791 ; CHECK-NEXT: vslideup.vi v8, v10, 4
792 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
793 ; CHECK-NEXT: vse16.v v8, (a0)
795 %a = load <6 x i16>, ptr %x
796 %b = load <6 x i16>, ptr %y
797 %c = sdiv <6 x i16> %a, %b
798 store <6 x i16> %c, ptr %x
802 define void @sdiv_v4i32(ptr %x, ptr %y) {
803 ; CHECK-LABEL: sdiv_v4i32:
805 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
806 ; CHECK-NEXT: vle32.v v8, (a0)
807 ; CHECK-NEXT: vle32.v v9, (a1)
808 ; CHECK-NEXT: vdiv.vv v8, v8, v9
809 ; CHECK-NEXT: vse32.v v8, (a0)
811 %a = load <4 x i32>, ptr %x
812 %b = load <4 x i32>, ptr %y
813 %c = sdiv <4 x i32> %a, %b
814 store <4 x i32> %c, ptr %x
818 define void @sdiv_v2i64(ptr %x, ptr %y) {
819 ; CHECK-LABEL: sdiv_v2i64:
821 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
822 ; CHECK-NEXT: vle64.v v8, (a0)
823 ; CHECK-NEXT: vle64.v v9, (a1)
824 ; CHECK-NEXT: vdiv.vv v8, v8, v9
825 ; CHECK-NEXT: vse64.v v8, (a0)
827 %a = load <2 x i64>, ptr %x
828 %b = load <2 x i64>, ptr %y
829 %c = sdiv <2 x i64> %a, %b
830 store <2 x i64> %c, ptr %x
834 define void @srem_v16i8(ptr %x, ptr %y) {
835 ; CHECK-LABEL: srem_v16i8:
837 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
838 ; CHECK-NEXT: vle8.v v8, (a0)
839 ; CHECK-NEXT: vle8.v v9, (a1)
840 ; CHECK-NEXT: vrem.vv v8, v8, v9
841 ; CHECK-NEXT: vse8.v v8, (a0)
843 %a = load <16 x i8>, ptr %x
844 %b = load <16 x i8>, ptr %y
845 %c = srem <16 x i8> %a, %b
846 store <16 x i8> %c, ptr %x
850 define void @srem_v8i16(ptr %x, ptr %y) {
851 ; CHECK-LABEL: srem_v8i16:
853 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
854 ; CHECK-NEXT: vle16.v v8, (a0)
855 ; CHECK-NEXT: vle16.v v9, (a1)
856 ; CHECK-NEXT: vrem.vv v8, v8, v9
857 ; CHECK-NEXT: vse16.v v8, (a0)
859 %a = load <8 x i16>, ptr %x
860 %b = load <8 x i16>, ptr %y
861 %c = srem <8 x i16> %a, %b
862 store <8 x i16> %c, ptr %x
866 define void @srem_v6i16(ptr %x, ptr %y) {
867 ; CHECK-LABEL: srem_v6i16:
869 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
870 ; CHECK-NEXT: vle16.v v8, (a1)
871 ; CHECK-NEXT: vle16.v v9, (a0)
872 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
873 ; CHECK-NEXT: vslidedown.vi v10, v8, 4
874 ; CHECK-NEXT: vslidedown.vi v11, v9, 4
875 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
876 ; CHECK-NEXT: vrem.vv v10, v11, v10
877 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
878 ; CHECK-NEXT: vrem.vv v8, v9, v8
879 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
880 ; CHECK-NEXT: vslideup.vi v8, v10, 4
881 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
882 ; CHECK-NEXT: vse16.v v8, (a0)
884 %a = load <6 x i16>, ptr %x
885 %b = load <6 x i16>, ptr %y
886 %c = srem <6 x i16> %a, %b
887 store <6 x i16> %c, ptr %x
891 define void @srem_v4i32(ptr %x, ptr %y) {
892 ; CHECK-LABEL: srem_v4i32:
894 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
895 ; CHECK-NEXT: vle32.v v8, (a0)
896 ; CHECK-NEXT: vle32.v v9, (a1)
897 ; CHECK-NEXT: vrem.vv v8, v8, v9
898 ; CHECK-NEXT: vse32.v v8, (a0)
900 %a = load <4 x i32>, ptr %x
901 %b = load <4 x i32>, ptr %y
902 %c = srem <4 x i32> %a, %b
903 store <4 x i32> %c, ptr %x
907 define void @srem_v2i64(ptr %x, ptr %y) {
908 ; CHECK-LABEL: srem_v2i64:
910 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
911 ; CHECK-NEXT: vle64.v v8, (a0)
912 ; CHECK-NEXT: vle64.v v9, (a1)
913 ; CHECK-NEXT: vrem.vv v8, v8, v9
914 ; CHECK-NEXT: vse64.v v8, (a0)
916 %a = load <2 x i64>, ptr %x
917 %b = load <2 x i64>, ptr %y
918 %c = srem <2 x i64> %a, %b
919 store <2 x i64> %c, ptr %x
923 define void @udiv_v16i8(ptr %x, ptr %y) {
924 ; CHECK-LABEL: udiv_v16i8:
926 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
927 ; CHECK-NEXT: vle8.v v8, (a0)
928 ; CHECK-NEXT: vle8.v v9, (a1)
929 ; CHECK-NEXT: vdivu.vv v8, v8, v9
930 ; CHECK-NEXT: vse8.v v8, (a0)
932 %a = load <16 x i8>, ptr %x
933 %b = load <16 x i8>, ptr %y
934 %c = udiv <16 x i8> %a, %b
935 store <16 x i8> %c, ptr %x
939 define void @udiv_v8i16(ptr %x, ptr %y) {
940 ; CHECK-LABEL: udiv_v8i16:
942 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
943 ; CHECK-NEXT: vle16.v v8, (a0)
944 ; CHECK-NEXT: vle16.v v9, (a1)
945 ; CHECK-NEXT: vdivu.vv v8, v8, v9
946 ; CHECK-NEXT: vse16.v v8, (a0)
948 %a = load <8 x i16>, ptr %x
949 %b = load <8 x i16>, ptr %y
950 %c = udiv <8 x i16> %a, %b
951 store <8 x i16> %c, ptr %x
955 define void @udiv_v6i16(ptr %x, ptr %y) {
956 ; CHECK-LABEL: udiv_v6i16:
958 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
959 ; CHECK-NEXT: vle16.v v8, (a1)
960 ; CHECK-NEXT: vle16.v v9, (a0)
961 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
962 ; CHECK-NEXT: vslidedown.vi v10, v8, 4
963 ; CHECK-NEXT: vslidedown.vi v11, v9, 4
964 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
965 ; CHECK-NEXT: vdivu.vv v10, v11, v10
966 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
967 ; CHECK-NEXT: vdivu.vv v8, v9, v8
968 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
969 ; CHECK-NEXT: vslideup.vi v8, v10, 4
970 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
971 ; CHECK-NEXT: vse16.v v8, (a0)
973 %a = load <6 x i16>, ptr %x
974 %b = load <6 x i16>, ptr %y
975 %c = udiv <6 x i16> %a, %b
976 store <6 x i16> %c, ptr %x
980 define void @udiv_v4i32(ptr %x, ptr %y) {
981 ; CHECK-LABEL: udiv_v4i32:
983 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
984 ; CHECK-NEXT: vle32.v v8, (a0)
985 ; CHECK-NEXT: vle32.v v9, (a1)
986 ; CHECK-NEXT: vdivu.vv v8, v8, v9
987 ; CHECK-NEXT: vse32.v v8, (a0)
989 %a = load <4 x i32>, ptr %x
990 %b = load <4 x i32>, ptr %y
991 %c = udiv <4 x i32> %a, %b
992 store <4 x i32> %c, ptr %x
996 define void @udiv_v2i64(ptr %x, ptr %y) {
997 ; CHECK-LABEL: udiv_v2i64:
999 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1000 ; CHECK-NEXT: vle64.v v8, (a0)
1001 ; CHECK-NEXT: vle64.v v9, (a1)
1002 ; CHECK-NEXT: vdivu.vv v8, v8, v9
1003 ; CHECK-NEXT: vse64.v v8, (a0)
1005 %a = load <2 x i64>, ptr %x
1006 %b = load <2 x i64>, ptr %y
1007 %c = udiv <2 x i64> %a, %b
1008 store <2 x i64> %c, ptr %x
1012 define void @urem_v16i8(ptr %x, ptr %y) {
1013 ; CHECK-LABEL: urem_v16i8:
1015 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1016 ; CHECK-NEXT: vle8.v v8, (a0)
1017 ; CHECK-NEXT: vle8.v v9, (a1)
1018 ; CHECK-NEXT: vremu.vv v8, v8, v9
1019 ; CHECK-NEXT: vse8.v v8, (a0)
1021 %a = load <16 x i8>, ptr %x
1022 %b = load <16 x i8>, ptr %y
1023 %c = urem <16 x i8> %a, %b
1024 store <16 x i8> %c, ptr %x
1028 define void @urem_v8i16(ptr %x, ptr %y) {
1029 ; CHECK-LABEL: urem_v8i16:
1031 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1032 ; CHECK-NEXT: vle16.v v8, (a0)
1033 ; CHECK-NEXT: vle16.v v9, (a1)
1034 ; CHECK-NEXT: vremu.vv v8, v8, v9
1035 ; CHECK-NEXT: vse16.v v8, (a0)
1037 %a = load <8 x i16>, ptr %x
1038 %b = load <8 x i16>, ptr %y
1039 %c = urem <8 x i16> %a, %b
1040 store <8 x i16> %c, ptr %x
1044 define void @urem_v6i16(ptr %x, ptr %y) {
1045 ; CHECK-LABEL: urem_v6i16:
1047 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1048 ; CHECK-NEXT: vle16.v v8, (a1)
1049 ; CHECK-NEXT: vle16.v v9, (a0)
1050 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
1051 ; CHECK-NEXT: vslidedown.vi v10, v8, 4
1052 ; CHECK-NEXT: vslidedown.vi v11, v9, 4
1053 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1054 ; CHECK-NEXT: vremu.vv v10, v11, v10
1055 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1056 ; CHECK-NEXT: vremu.vv v8, v9, v8
1057 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1058 ; CHECK-NEXT: vslideup.vi v8, v10, 4
1059 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1060 ; CHECK-NEXT: vse16.v v8, (a0)
1062 %a = load <6 x i16>, ptr %x
1063 %b = load <6 x i16>, ptr %y
1064 %c = urem <6 x i16> %a, %b
1065 store <6 x i16> %c, ptr %x
1069 define void @urem_v4i32(ptr %x, ptr %y) {
1070 ; CHECK-LABEL: urem_v4i32:
1072 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1073 ; CHECK-NEXT: vle32.v v8, (a0)
1074 ; CHECK-NEXT: vle32.v v9, (a1)
1075 ; CHECK-NEXT: vremu.vv v8, v8, v9
1076 ; CHECK-NEXT: vse32.v v8, (a0)
1078 %a = load <4 x i32>, ptr %x
1079 %b = load <4 x i32>, ptr %y
1080 %c = urem <4 x i32> %a, %b
1081 store <4 x i32> %c, ptr %x
1085 define void @urem_v2i64(ptr %x, ptr %y) {
1086 ; CHECK-LABEL: urem_v2i64:
1088 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1089 ; CHECK-NEXT: vle64.v v8, (a0)
1090 ; CHECK-NEXT: vle64.v v9, (a1)
1091 ; CHECK-NEXT: vremu.vv v8, v8, v9
1092 ; CHECK-NEXT: vse64.v v8, (a0)
1094 %a = load <2 x i64>, ptr %x
1095 %b = load <2 x i64>, ptr %y
1096 %c = urem <2 x i64> %a, %b
1097 store <2 x i64> %c, ptr %x
1101 define void @mulhu_v16i8(ptr %x) {
1102 ; CHECK-LABEL: mulhu_v16i8:
1104 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1105 ; CHECK-NEXT: vle8.v v8, (a0)
1106 ; CHECK-NEXT: lui a1, 3
1107 ; CHECK-NEXT: addi a1, a1, -2044
1108 ; CHECK-NEXT: vmv.s.x v0, a1
1109 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1110 ; CHECK-NEXT: vmv.v.i v9, 0
1111 ; CHECK-NEXT: li a1, -128
1112 ; CHECK-NEXT: vmerge.vxm v10, v9, a1, v0
1113 ; CHECK-NEXT: lui a1, 1
1114 ; CHECK-NEXT: addi a2, a1, 32
1115 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1116 ; CHECK-NEXT: vmv.s.x v0, a2
1117 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1118 ; CHECK-NEXT: lui a2, %hi(.LCPI65_0)
1119 ; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0)
1120 ; CHECK-NEXT: vle8.v v11, (a2)
1121 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
1122 ; CHECK-NEXT: vsrl.vv v9, v8, v9
1123 ; CHECK-NEXT: vmulhu.vv v9, v9, v11
1124 ; CHECK-NEXT: vsub.vv v8, v8, v9
1125 ; CHECK-NEXT: vmulhu.vv v8, v8, v10
1126 ; CHECK-NEXT: vadd.vv v8, v8, v9
1127 ; CHECK-NEXT: li a2, 513
1128 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1129 ; CHECK-NEXT: vmv.s.x v0, a2
1130 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1131 ; CHECK-NEXT: vmv.v.i v9, 4
1132 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
1133 ; CHECK-NEXT: addi a1, a1, 78
1134 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1135 ; CHECK-NEXT: vmv.s.x v0, a1
1136 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1137 ; CHECK-NEXT: vmerge.vim v9, v9, 3, v0
1138 ; CHECK-NEXT: lui a1, 8
1139 ; CHECK-NEXT: addi a1, a1, 304
1140 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1141 ; CHECK-NEXT: vmv.s.x v0, a1
1142 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1143 ; CHECK-NEXT: vmerge.vim v9, v9, 2, v0
1144 ; CHECK-NEXT: vsrl.vv v8, v8, v9
1145 ; CHECK-NEXT: vse8.v v8, (a0)
1147 %a = load <16 x i8>, ptr %x
1148 %b = udiv <16 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25>
1149 store <16 x i8> %b, ptr %x
1153 define void @mulhu_v8i16(ptr %x) {
1154 ; CHECK-LABEL: mulhu_v8i16:
1156 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1157 ; CHECK-NEXT: vle16.v v8, (a0)
1158 ; CHECK-NEXT: vmv.v.i v9, 0
1159 ; CHECK-NEXT: lui a1, 1048568
1160 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma
1161 ; CHECK-NEXT: vmv.v.i v10, 0
1162 ; CHECK-NEXT: vmv.s.x v10, a1
1163 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1164 ; CHECK-NEXT: vmv.v.i v11, 1
1165 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1166 ; CHECK-NEXT: vslideup.vi v9, v11, 6
1167 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1168 ; CHECK-NEXT: lui a1, %hi(.LCPI66_0)
1169 ; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0)
1170 ; CHECK-NEXT: vle16.v v12, (a1)
1171 ; CHECK-NEXT: vsrl.vv v9, v8, v9
1172 ; CHECK-NEXT: vmulhu.vv v9, v9, v12
1173 ; CHECK-NEXT: vsub.vv v8, v8, v9
1174 ; CHECK-NEXT: vmulhu.vv v8, v8, v10
1175 ; CHECK-NEXT: vadd.vv v8, v8, v9
1176 ; CHECK-NEXT: li a1, 33
1177 ; CHECK-NEXT: vmv.s.x v0, a1
1178 ; CHECK-NEXT: vmv.v.i v9, 3
1179 ; CHECK-NEXT: vmerge.vim v9, v9, 2, v0
1180 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1181 ; CHECK-NEXT: vslideup.vi v9, v11, 6
1182 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1183 ; CHECK-NEXT: vsrl.vv v8, v8, v9
1184 ; CHECK-NEXT: vse16.v v8, (a0)
1186 %a = load <8 x i16>, ptr %x
1187 %b = udiv <8 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
1188 store <8 x i16> %b, ptr %x
1192 define void @mulhu_v6i16(ptr %x) {
1193 ; CHECK-LABEL: mulhu_v6i16:
1195 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1196 ; CHECK-NEXT: vle16.v v8, (a0)
1197 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1198 ; CHECK-NEXT: vid.v v9
1199 ; CHECK-NEXT: vadd.vi v9, v9, 12
1200 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
1201 ; CHECK-NEXT: vslidedown.vi v10, v8, 4
1202 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1203 ; CHECK-NEXT: vdivu.vv v9, v10, v9
1204 ; CHECK-NEXT: lui a1, 45217
1205 ; CHECK-NEXT: addi a1, a1, -1785
1206 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1207 ; CHECK-NEXT: vmv.s.x v10, a1
1208 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1209 ; CHECK-NEXT: vsext.vf2 v11, v10
1210 ; CHECK-NEXT: vdivu.vv v8, v8, v11
1211 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1212 ; CHECK-NEXT: vslideup.vi v8, v9, 4
1213 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1214 ; CHECK-NEXT: vse16.v v8, (a0)
1216 %a = load <6 x i16>, ptr %x
1217 %b = udiv <6 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13>
1218 store <6 x i16> %b, ptr %x
1222 define void @mulhu_v4i32(ptr %x) {
1223 ; CHECK-LABEL: mulhu_v4i32:
1225 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1226 ; CHECK-NEXT: vle32.v v8, (a0)
1227 ; CHECK-NEXT: lui a1, 524288
1228 ; CHECK-NEXT: vmv.s.x v9, a1
1229 ; CHECK-NEXT: vmv.v.i v10, 0
1230 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
1231 ; CHECK-NEXT: vslideup.vi v10, v9, 2
1232 ; CHECK-NEXT: lui a1, %hi(.LCPI68_0)
1233 ; CHECK-NEXT: addi a1, a1, %lo(.LCPI68_0)
1234 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1235 ; CHECK-NEXT: vle32.v v9, (a1)
1236 ; CHECK-NEXT: vmulhu.vv v9, v8, v9
1237 ; CHECK-NEXT: vsub.vv v8, v8, v9
1238 ; CHECK-NEXT: vmulhu.vv v8, v8, v10
1239 ; CHECK-NEXT: vadd.vv v8, v8, v9
1240 ; CHECK-NEXT: lui a1, 4128
1241 ; CHECK-NEXT: addi a1, a1, 514
1242 ; CHECK-NEXT: vmv.s.x v9, a1
1243 ; CHECK-NEXT: vsext.vf4 v10, v9
1244 ; CHECK-NEXT: vsrl.vv v8, v8, v10
1245 ; CHECK-NEXT: vse32.v v8, (a0)
1247 %a = load <4 x i32>, ptr %x
1248 %b = udiv <4 x i32> %a, <i32 5, i32 6, i32 7, i32 9>
1249 store <4 x i32> %b, ptr %x
1253 define void @mulhu_v2i64(ptr %x) {
1254 ; RV32-LABEL: mulhu_v2i64:
1256 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1257 ; RV32-NEXT: vle64.v v8, (a0)
1258 ; RV32-NEXT: lui a1, %hi(.LCPI69_0)
1259 ; RV32-NEXT: addi a1, a1, %lo(.LCPI69_0)
1260 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1261 ; RV32-NEXT: vle32.v v9, (a1)
1262 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1263 ; RV32-NEXT: vmulhu.vv v8, v8, v9
1264 ; RV32-NEXT: lui a1, 32
1265 ; RV32-NEXT: addi a1, a1, 1
1266 ; RV32-NEXT: vmv.s.x v9, a1
1267 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1268 ; RV32-NEXT: vsext.vf4 v10, v9
1269 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1270 ; RV32-NEXT: vsrl.vv v8, v8, v10
1271 ; RV32-NEXT: vse64.v v8, (a0)
1274 ; RV64-LABEL: mulhu_v2i64:
1276 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1277 ; RV64-NEXT: vle64.v v8, (a0)
1278 ; RV64-NEXT: lui a1, 838861
1279 ; RV64-NEXT: addiw a1, a1, -819
1280 ; RV64-NEXT: slli a2, a1, 32
1281 ; RV64-NEXT: add a1, a1, a2
1282 ; RV64-NEXT: vmv.v.x v9, a1
1283 ; RV64-NEXT: lui a1, 699051
1284 ; RV64-NEXT: addiw a1, a1, -1365
1285 ; RV64-NEXT: slli a2, a1, 32
1286 ; RV64-NEXT: add a1, a1, a2
1287 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
1288 ; RV64-NEXT: vmv.s.x v9, a1
1289 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1290 ; RV64-NEXT: vmulhu.vv v8, v8, v9
1291 ; RV64-NEXT: vid.v v9
1292 ; RV64-NEXT: vadd.vi v9, v9, 1
1293 ; RV64-NEXT: vsrl.vv v8, v8, v9
1294 ; RV64-NEXT: vse64.v v8, (a0)
1296 %a = load <2 x i64>, ptr %x
1297 %b = udiv <2 x i64> %a, <i64 3, i64 5>
1298 store <2 x i64> %b, ptr %x
1302 define void @mulhs_v16i8(ptr %x) {
1303 ; CHECK-LABEL: mulhs_v16i8:
1305 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1306 ; CHECK-NEXT: vle8.v v8, (a0)
1307 ; CHECK-NEXT: li a1, -123
1308 ; CHECK-NEXT: vmv.v.x v9, a1
1309 ; CHECK-NEXT: lui a1, 5
1310 ; CHECK-NEXT: addi a1, a1, -1452
1311 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1312 ; CHECK-NEXT: vmv.s.x v0, a1
1313 ; CHECK-NEXT: li a1, 57
1314 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1315 ; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0
1316 ; CHECK-NEXT: vmulhu.vv v8, v8, v9
1317 ; CHECK-NEXT: vmv.v.i v9, 7
1318 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
1319 ; CHECK-NEXT: vsrl.vv v8, v8, v9
1320 ; CHECK-NEXT: vse8.v v8, (a0)
1322 %a = load <16 x i8>, ptr %x
1323 %b = udiv <16 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
1324 store <16 x i8> %b, ptr %x
1328 define void @mulhs_v8i16(ptr %x) {
1329 ; CHECK-LABEL: mulhs_v8i16:
1331 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1332 ; CHECK-NEXT: vle16.v v8, (a0)
1333 ; CHECK-NEXT: lui a1, 5
1334 ; CHECK-NEXT: addi a1, a1, -1755
1335 ; CHECK-NEXT: vmv.v.x v9, a1
1336 ; CHECK-NEXT: li a1, 105
1337 ; CHECK-NEXT: vmv.s.x v0, a1
1338 ; CHECK-NEXT: lui a1, 1048571
1339 ; CHECK-NEXT: addi a1, a1, 1755
1340 ; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0
1341 ; CHECK-NEXT: vmulh.vv v8, v8, v9
1342 ; CHECK-NEXT: vsra.vi v8, v8, 1
1343 ; CHECK-NEXT: vsrl.vi v9, v8, 15
1344 ; CHECK-NEXT: vadd.vv v8, v8, v9
1345 ; CHECK-NEXT: vse16.v v8, (a0)
1347 %a = load <8 x i16>, ptr %x
1348 %b = sdiv <8 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7>
1349 store <8 x i16> %b, ptr %x
1353 define void @mulhs_v6i16(ptr %x) {
1354 ; CHECK-LABEL: mulhs_v6i16:
1356 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1357 ; CHECK-NEXT: vle16.v v8, (a0)
1358 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1359 ; CHECK-NEXT: vmv.v.i v9, 7
1360 ; CHECK-NEXT: vid.v v10
1361 ; CHECK-NEXT: li a1, -14
1362 ; CHECK-NEXT: vmadd.vx v10, a1, v9
1363 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
1364 ; CHECK-NEXT: vslidedown.vi v9, v8, 4
1365 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1366 ; CHECK-NEXT: vdiv.vv v9, v9, v10
1367 ; CHECK-NEXT: lui a1, 1020016
1368 ; CHECK-NEXT: addi a1, a1, 2041
1369 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1370 ; CHECK-NEXT: vmv.s.x v10, a1
1371 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1372 ; CHECK-NEXT: vsext.vf2 v11, v10
1373 ; CHECK-NEXT: vdiv.vv v8, v8, v11
1374 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1375 ; CHECK-NEXT: vslideup.vi v8, v9, 4
1376 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1377 ; CHECK-NEXT: vse16.v v8, (a0)
1379 %a = load <6 x i16>, ptr %x
1380 %b = sdiv <6 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7>
1381 store <6 x i16> %b, ptr %x
1385 define void @mulhs_v4i32(ptr %x) {
1386 ; RV32-LABEL: mulhs_v4i32:
1388 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1389 ; RV32-NEXT: vle32.v v8, (a0)
1390 ; RV32-NEXT: lui a1, 419430
1391 ; RV32-NEXT: addi a1, a1, 1639
1392 ; RV32-NEXT: vmv.v.x v9, a1
1393 ; RV32-NEXT: vmv.v.i v0, 5
1394 ; RV32-NEXT: lui a1, 629146
1395 ; RV32-NEXT: addi a1, a1, -1639
1396 ; RV32-NEXT: vmerge.vxm v9, v9, a1, v0
1397 ; RV32-NEXT: vmulh.vv v8, v8, v9
1398 ; RV32-NEXT: vsrl.vi v9, v8, 31
1399 ; RV32-NEXT: vsra.vi v8, v8, 1
1400 ; RV32-NEXT: vadd.vv v8, v8, v9
1401 ; RV32-NEXT: vse32.v v8, (a0)
1404 ; RV64-LABEL: mulhs_v4i32:
1406 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1407 ; RV64-NEXT: vle32.v v8, (a0)
1408 ; RV64-NEXT: lui a1, %hi(.LCPI73_0)
1409 ; RV64-NEXT: addi a1, a1, %lo(.LCPI73_0)
1410 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1411 ; RV64-NEXT: vlse64.v v9, (a1), zero
1412 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1413 ; RV64-NEXT: vmulh.vv v8, v8, v9
1414 ; RV64-NEXT: vsra.vi v8, v8, 1
1415 ; RV64-NEXT: vsrl.vi v9, v8, 31
1416 ; RV64-NEXT: vadd.vv v8, v8, v9
1417 ; RV64-NEXT: vse32.v v8, (a0)
1419 %a = load <4 x i32>, ptr %x
1420 %b = sdiv <4 x i32> %a, <i32 -5, i32 5, i32 -5, i32 5>
1421 store <4 x i32> %b, ptr %x
1425 define void @mulhs_v2i64(ptr %x) {
1426 ; RV32-LABEL: mulhs_v2i64:
1428 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1429 ; RV32-NEXT: vle64.v v8, (a0)
1430 ; RV32-NEXT: lui a1, 349525
1431 ; RV32-NEXT: addi a2, a1, 1365
1432 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1433 ; RV32-NEXT: vmv.v.x v9, a2
1434 ; RV32-NEXT: addi a1, a1, 1366
1435 ; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma
1436 ; RV32-NEXT: vmv.s.x v9, a1
1437 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1438 ; RV32-NEXT: vmulh.vv v9, v8, v9
1439 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1440 ; RV32-NEXT: vid.v v10
1441 ; RV32-NEXT: vsrl.vi v10, v10, 1
1442 ; RV32-NEXT: vrsub.vi v10, v10, 0
1443 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1444 ; RV32-NEXT: vmadd.vv v10, v8, v9
1445 ; RV32-NEXT: li a1, 63
1446 ; RV32-NEXT: vsrl.vx v8, v10, a1
1447 ; RV32-NEXT: lui a1, 16
1448 ; RV32-NEXT: vmv.s.x v9, a1
1449 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1450 ; RV32-NEXT: vsext.vf4 v11, v9
1451 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1452 ; RV32-NEXT: vsra.vv v9, v10, v11
1453 ; RV32-NEXT: vadd.vv v8, v9, v8
1454 ; RV32-NEXT: vse64.v v8, (a0)
1457 ; RV64-LABEL: mulhs_v2i64:
1459 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1460 ; RV64-NEXT: lui a1, 349525
1461 ; RV64-NEXT: addiw a1, a1, 1365
1462 ; RV64-NEXT: slli a2, a1, 32
1463 ; RV64-NEXT: add a1, a1, a2
1464 ; RV64-NEXT: lui a2, %hi(.LCPI74_0)
1465 ; RV64-NEXT: ld a2, %lo(.LCPI74_0)(a2)
1466 ; RV64-NEXT: vle64.v v8, (a0)
1467 ; RV64-NEXT: vmv.v.x v9, a1
1468 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
1469 ; RV64-NEXT: vmv.s.x v9, a2
1470 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1471 ; RV64-NEXT: vmulh.vv v9, v8, v9
1472 ; RV64-NEXT: vid.v v10
1473 ; RV64-NEXT: vrsub.vi v11, v10, 0
1474 ; RV64-NEXT: vmadd.vv v11, v8, v9
1475 ; RV64-NEXT: li a1, 63
1476 ; RV64-NEXT: vsrl.vx v8, v11, a1
1477 ; RV64-NEXT: vsra.vv v9, v11, v10
1478 ; RV64-NEXT: vadd.vv v8, v9, v8
1479 ; RV64-NEXT: vse64.v v8, (a0)
1481 %a = load <2 x i64>, ptr %x
1482 %b = sdiv <2 x i64> %a, <i64 3, i64 -3>
1483 store <2 x i64> %b, ptr %x
1487 define void @smin_v16i8(ptr %x, ptr %y) {
1488 ; CHECK-LABEL: smin_v16i8:
1490 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1491 ; CHECK-NEXT: vle8.v v8, (a0)
1492 ; CHECK-NEXT: vle8.v v9, (a1)
1493 ; CHECK-NEXT: vmin.vv v8, v8, v9
1494 ; CHECK-NEXT: vse8.v v8, (a0)
1496 %a = load <16 x i8>, ptr %x
1497 %b = load <16 x i8>, ptr %y
1498 %cc = icmp slt <16 x i8> %a, %b
1499 %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
1500 store <16 x i8> %c, ptr %x
1504 define void @smin_v8i16(ptr %x, ptr %y) {
1505 ; CHECK-LABEL: smin_v8i16:
1507 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1508 ; CHECK-NEXT: vle16.v v8, (a0)
1509 ; CHECK-NEXT: vle16.v v9, (a1)
1510 ; CHECK-NEXT: vmin.vv v8, v8, v9
1511 ; CHECK-NEXT: vse16.v v8, (a0)
1513 %a = load <8 x i16>, ptr %x
1514 %b = load <8 x i16>, ptr %y
1515 %cc = icmp slt <8 x i16> %a, %b
1516 %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
1517 store <8 x i16> %c, ptr %x
1521 define void @smin_v6i16(ptr %x, ptr %y) {
1522 ; CHECK-LABEL: smin_v6i16:
1524 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1525 ; CHECK-NEXT: vle16.v v8, (a0)
1526 ; CHECK-NEXT: vle16.v v9, (a1)
1527 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1528 ; CHECK-NEXT: vmin.vv v8, v8, v9
1529 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1530 ; CHECK-NEXT: vse16.v v8, (a0)
1532 %a = load <6 x i16>, ptr %x
1533 %b = load <6 x i16>, ptr %y
1534 %cc = icmp slt <6 x i16> %a, %b
1535 %c = select <6 x i1> %cc, <6 x i16> %a, <6 x i16> %b
1536 store <6 x i16> %c, ptr %x
1540 define void @smin_v4i32(ptr %x, ptr %y) {
1541 ; CHECK-LABEL: smin_v4i32:
1543 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1544 ; CHECK-NEXT: vle32.v v8, (a0)
1545 ; CHECK-NEXT: vle32.v v9, (a1)
1546 ; CHECK-NEXT: vmin.vv v8, v8, v9
1547 ; CHECK-NEXT: vse32.v v8, (a0)
1549 %a = load <4 x i32>, ptr %x
1550 %b = load <4 x i32>, ptr %y
1551 %cc = icmp slt <4 x i32> %a, %b
1552 %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
1553 store <4 x i32> %c, ptr %x
1557 define void @smin_v2i64(ptr %x, ptr %y) {
1558 ; CHECK-LABEL: smin_v2i64:
1560 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1561 ; CHECK-NEXT: vle64.v v8, (a0)
1562 ; CHECK-NEXT: vle64.v v9, (a1)
1563 ; CHECK-NEXT: vmin.vv v8, v8, v9
1564 ; CHECK-NEXT: vse64.v v8, (a0)
1566 %a = load <2 x i64>, ptr %x
1567 %b = load <2 x i64>, ptr %y
1568 %cc = icmp slt <2 x i64> %a, %b
1569 %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
1570 store <2 x i64> %c, ptr %x
1574 define void @smin_vx_v16i8(ptr %x, i8 %y) {
1575 ; CHECK-LABEL: smin_vx_v16i8:
1577 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1578 ; CHECK-NEXT: vle8.v v8, (a0)
1579 ; CHECK-NEXT: vmin.vx v8, v8, a1
1580 ; CHECK-NEXT: vse8.v v8, (a0)
1582 %a = load <16 x i8>, ptr %x
1583 %b = insertelement <16 x i8> poison, i8 %y, i32 0
1584 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
1585 %d = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %c)
1586 store <16 x i8> %d, ptr %x
1589 declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
1591 define void @smin_vx_v8i16(ptr %x, i16 %y) {
1592 ; CHECK-LABEL: smin_vx_v8i16:
1594 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1595 ; CHECK-NEXT: vle16.v v8, (a0)
1596 ; CHECK-NEXT: vmin.vx v8, v8, a1
1597 ; CHECK-NEXT: vse16.v v8, (a0)
1599 %a = load <8 x i16>, ptr %x
1600 %b = insertelement <8 x i16> poison, i16 %y, i32 0
1601 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
1602 %d = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %c)
1603 store <8 x i16> %d, ptr %x
1606 declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
1608 define void @smin_vx_v6i16(ptr %x, i16 %y) {
1609 ; CHECK-LABEL: smin_vx_v6i16:
1611 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1612 ; CHECK-NEXT: vle16.v v8, (a0)
1613 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1614 ; CHECK-NEXT: vmin.vx v8, v8, a1
1615 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1616 ; CHECK-NEXT: vse16.v v8, (a0)
1618 %a = load <6 x i16>, ptr %x
1619 %b = insertelement <6 x i16> poison, i16 %y, i32 0
1620 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
1621 %d = call <6 x i16> @llvm.smin.v6i16(<6 x i16> %a, <6 x i16> %c)
1622 store <6 x i16> %d, ptr %x
1625 declare <6 x i16> @llvm.smin.v6i16(<6 x i16>, <6 x i16>)
1627 define void @smin_vx_v4i32(ptr %x, i32 %y) {
1628 ; CHECK-LABEL: smin_vx_v4i32:
1630 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1631 ; CHECK-NEXT: vle32.v v8, (a0)
1632 ; CHECK-NEXT: vmin.vx v8, v8, a1
1633 ; CHECK-NEXT: vse32.v v8, (a0)
1635 %a = load <4 x i32>, ptr %x
1636 %b = insertelement <4 x i32> poison, i32 %y, i32 0
1637 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
1638 %d = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %c)
1639 store <4 x i32> %d, ptr %x
1642 declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
1644 define void @smin_xv_v16i8(ptr %x, i8 %y) {
1645 ; CHECK-LABEL: smin_xv_v16i8:
1647 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1648 ; CHECK-NEXT: vle8.v v8, (a0)
1649 ; CHECK-NEXT: vmin.vx v8, v8, a1
1650 ; CHECK-NEXT: vse8.v v8, (a0)
1652 %a = load <16 x i8>, ptr %x
1653 %b = insertelement <16 x i8> poison, i8 %y, i32 0
1654 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
1655 %d = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %c, <16 x i8> %a)
1656 store <16 x i8> %d, ptr %x
1660 define void @smin_xv_v8i16(ptr %x, i16 %y) {
1661 ; CHECK-LABEL: smin_xv_v8i16:
1663 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1664 ; CHECK-NEXT: vle16.v v8, (a0)
1665 ; CHECK-NEXT: vmin.vx v8, v8, a1
1666 ; CHECK-NEXT: vse16.v v8, (a0)
1668 %a = load <8 x i16>, ptr %x
1669 %b = insertelement <8 x i16> poison, i16 %y, i32 0
1670 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
1671 %d = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %c, <8 x i16> %a)
1672 store <8 x i16> %d, ptr %x
1676 define void @smin_xv_v6i16(ptr %x, i16 %y) {
1677 ; CHECK-LABEL: smin_xv_v6i16:
1679 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1680 ; CHECK-NEXT: vle16.v v8, (a0)
1681 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1682 ; CHECK-NEXT: vmin.vx v8, v8, a1
1683 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1684 ; CHECK-NEXT: vse16.v v8, (a0)
1686 %a = load <6 x i16>, ptr %x
1687 %b = insertelement <6 x i16> poison, i16 %y, i32 0
1688 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
1689 %d = call <6 x i16> @llvm.smin.v6i16(<6 x i16> %c, <6 x i16> %a)
1690 store <6 x i16> %d, ptr %x
1694 define void @smin_xv_v4i32(ptr %x, i32 %y) {
1695 ; CHECK-LABEL: smin_xv_v4i32:
1697 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1698 ; CHECK-NEXT: vle32.v v8, (a0)
1699 ; CHECK-NEXT: vmin.vx v8, v8, a1
1700 ; CHECK-NEXT: vse32.v v8, (a0)
1702 %a = load <4 x i32>, ptr %x
1703 %b = insertelement <4 x i32> poison, i32 %y, i32 0
1704 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
1705 %d = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %c, <4 x i32> %a)
1706 store <4 x i32> %d, ptr %x
1710 define void @smax_v16i8(ptr %x, ptr %y) {
1711 ; CHECK-LABEL: smax_v16i8:
1713 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1714 ; CHECK-NEXT: vle8.v v8, (a0)
1715 ; CHECK-NEXT: vle8.v v9, (a1)
1716 ; CHECK-NEXT: vmax.vv v8, v8, v9
1717 ; CHECK-NEXT: vse8.v v8, (a0)
1719 %a = load <16 x i8>, ptr %x
1720 %b = load <16 x i8>, ptr %y
1721 %cc = icmp sgt <16 x i8> %a, %b
1722 %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
1723 store <16 x i8> %c, ptr %x
1727 define void @smax_v8i16(ptr %x, ptr %y) {
1728 ; CHECK-LABEL: smax_v8i16:
1730 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1731 ; CHECK-NEXT: vle16.v v8, (a0)
1732 ; CHECK-NEXT: vle16.v v9, (a1)
1733 ; CHECK-NEXT: vmax.vv v8, v8, v9
1734 ; CHECK-NEXT: vse16.v v8, (a0)
1736 %a = load <8 x i16>, ptr %x
1737 %b = load <8 x i16>, ptr %y
1738 %cc = icmp sgt <8 x i16> %a, %b
1739 %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
1740 store <8 x i16> %c, ptr %x
1744 define void @smax_v6i16(ptr %x, ptr %y) {
1745 ; CHECK-LABEL: smax_v6i16:
1747 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1748 ; CHECK-NEXT: vle16.v v8, (a0)
1749 ; CHECK-NEXT: vle16.v v9, (a1)
1750 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1751 ; CHECK-NEXT: vmax.vv v8, v8, v9
1752 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1753 ; CHECK-NEXT: vse16.v v8, (a0)
1755 %a = load <6 x i16>, ptr %x
1756 %b = load <6 x i16>, ptr %y
1757 %cc = icmp sgt <6 x i16> %a, %b
1758 %c = select <6 x i1> %cc, <6 x i16> %a, <6 x i16> %b
1759 store <6 x i16> %c, ptr %x
1763 define void @smax_v4i32(ptr %x, ptr %y) {
1764 ; CHECK-LABEL: smax_v4i32:
1766 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1767 ; CHECK-NEXT: vle32.v v8, (a0)
1768 ; CHECK-NEXT: vle32.v v9, (a1)
1769 ; CHECK-NEXT: vmax.vv v8, v8, v9
1770 ; CHECK-NEXT: vse32.v v8, (a0)
1772 %a = load <4 x i32>, ptr %x
1773 %b = load <4 x i32>, ptr %y
1774 %cc = icmp sgt <4 x i32> %a, %b
1775 %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
1776 store <4 x i32> %c, ptr %x
1780 define void @smax_v2i64(ptr %x, ptr %y) {
1781 ; CHECK-LABEL: smax_v2i64:
1783 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1784 ; CHECK-NEXT: vle64.v v8, (a0)
1785 ; CHECK-NEXT: vle64.v v9, (a1)
1786 ; CHECK-NEXT: vmax.vv v8, v8, v9
1787 ; CHECK-NEXT: vse64.v v8, (a0)
1789 %a = load <2 x i64>, ptr %x
1790 %b = load <2 x i64>, ptr %y
1791 %cc = icmp sgt <2 x i64> %a, %b
1792 %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
1793 store <2 x i64> %c, ptr %x
1797 define void @smax_vx_v16i8(ptr %x, i8 %y) {
1798 ; CHECK-LABEL: smax_vx_v16i8:
1800 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1801 ; CHECK-NEXT: vle8.v v8, (a0)
1802 ; CHECK-NEXT: vmax.vx v8, v8, a1
1803 ; CHECK-NEXT: vse8.v v8, (a0)
1805 %a = load <16 x i8>, ptr %x
1806 %b = insertelement <16 x i8> poison, i8 %y, i32 0
1807 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
1808 %d = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %c)
1809 store <16 x i8> %d, ptr %x
1812 declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
1814 define void @smax_vx_v8i16(ptr %x, i16 %y) {
1815 ; CHECK-LABEL: smax_vx_v8i16:
1817 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1818 ; CHECK-NEXT: vle16.v v8, (a0)
1819 ; CHECK-NEXT: vmax.vx v8, v8, a1
1820 ; CHECK-NEXT: vse16.v v8, (a0)
1822 %a = load <8 x i16>, ptr %x
1823 %b = insertelement <8 x i16> poison, i16 %y, i32 0
1824 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
1825 %d = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %c)
1826 store <8 x i16> %d, ptr %x
1829 declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
1831 define void @smax_vx_v6i16(ptr %x, i16 %y) {
1832 ; CHECK-LABEL: smax_vx_v6i16:
1834 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1835 ; CHECK-NEXT: vle16.v v8, (a0)
1836 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1837 ; CHECK-NEXT: vmax.vx v8, v8, a1
1838 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1839 ; CHECK-NEXT: vse16.v v8, (a0)
1841 %a = load <6 x i16>, ptr %x
1842 %b = insertelement <6 x i16> poison, i16 %y, i32 0
1843 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
1844 %d = call <6 x i16> @llvm.smax.v6i16(<6 x i16> %a, <6 x i16> %c)
1845 store <6 x i16> %d, ptr %x
1848 declare <6 x i16> @llvm.smax.v6i16(<6 x i16>, <6 x i16>)
1850 define void @smax_vx_v4i32(ptr %x, i32 %y) {
1851 ; CHECK-LABEL: smax_vx_v4i32:
1853 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1854 ; CHECK-NEXT: vle32.v v8, (a0)
1855 ; CHECK-NEXT: vmax.vx v8, v8, a1
1856 ; CHECK-NEXT: vse32.v v8, (a0)
1858 %a = load <4 x i32>, ptr %x
1859 %b = insertelement <4 x i32> poison, i32 %y, i32 0
1860 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
1861 %d = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %c)
1862 store <4 x i32> %d, ptr %x
1865 declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
1867 define void @smax_xv_v16i8(ptr %x, i8 %y) {
1868 ; CHECK-LABEL: smax_xv_v16i8:
1870 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1871 ; CHECK-NEXT: vle8.v v8, (a0)
1872 ; CHECK-NEXT: vmax.vx v8, v8, a1
1873 ; CHECK-NEXT: vse8.v v8, (a0)
1875 %a = load <16 x i8>, ptr %x
1876 %b = insertelement <16 x i8> poison, i8 %y, i32 0
1877 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
1878 %d = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %c, <16 x i8> %a)
1879 store <16 x i8> %d, ptr %x
1883 define void @smax_xv_v8i16(ptr %x, i16 %y) {
1884 ; CHECK-LABEL: smax_xv_v8i16:
1886 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1887 ; CHECK-NEXT: vle16.v v8, (a0)
1888 ; CHECK-NEXT: vmax.vx v8, v8, a1
1889 ; CHECK-NEXT: vse16.v v8, (a0)
1891 %a = load <8 x i16>, ptr %x
1892 %b = insertelement <8 x i16> poison, i16 %y, i32 0
1893 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
1894 %d = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %c, <8 x i16> %a)
1895 store <8 x i16> %d, ptr %x
1899 define void @smax_xv_v6i16(ptr %x, i16 %y) {
1900 ; CHECK-LABEL: smax_xv_v6i16:
1902 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1903 ; CHECK-NEXT: vle16.v v8, (a0)
1904 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1905 ; CHECK-NEXT: vmax.vx v8, v8, a1
1906 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1907 ; CHECK-NEXT: vse16.v v8, (a0)
1909 %a = load <6 x i16>, ptr %x
1910 %b = insertelement <6 x i16> poison, i16 %y, i32 0
1911 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
1912 %d = call <6 x i16> @llvm.smax.v6i16(<6 x i16> %c, <6 x i16> %a)
1913 store <6 x i16> %d, ptr %x
1917 define void @smax_xv_v4i32(ptr %x, i32 %y) {
1918 ; CHECK-LABEL: smax_xv_v4i32:
1920 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1921 ; CHECK-NEXT: vle32.v v8, (a0)
1922 ; CHECK-NEXT: vmax.vx v8, v8, a1
1923 ; CHECK-NEXT: vse32.v v8, (a0)
1925 %a = load <4 x i32>, ptr %x
1926 %b = insertelement <4 x i32> poison, i32 %y, i32 0
1927 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
1928 %d = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %c, <4 x i32> %a)
1929 store <4 x i32> %d, ptr %x
1933 define void @umin_v16i8(ptr %x, ptr %y) {
1934 ; CHECK-LABEL: umin_v16i8:
1936 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
1937 ; CHECK-NEXT: vle8.v v8, (a0)
1938 ; CHECK-NEXT: vle8.v v9, (a1)
1939 ; CHECK-NEXT: vminu.vv v8, v8, v9
1940 ; CHECK-NEXT: vse8.v v8, (a0)
1942 %a = load <16 x i8>, ptr %x
1943 %b = load <16 x i8>, ptr %y
1944 %cc = icmp ult <16 x i8> %a, %b
1945 %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
1946 store <16 x i8> %c, ptr %x
1950 define void @umin_v8i16(ptr %x, ptr %y) {
1951 ; CHECK-LABEL: umin_v8i16:
1953 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1954 ; CHECK-NEXT: vle16.v v8, (a0)
1955 ; CHECK-NEXT: vle16.v v9, (a1)
1956 ; CHECK-NEXT: vminu.vv v8, v8, v9
1957 ; CHECK-NEXT: vse16.v v8, (a0)
1959 %a = load <8 x i16>, ptr %x
1960 %b = load <8 x i16>, ptr %y
1961 %cc = icmp ult <8 x i16> %a, %b
1962 %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
1963 store <8 x i16> %c, ptr %x
1967 define void @umin_v6i16(ptr %x, ptr %y) {
1968 ; CHECK-LABEL: umin_v6i16:
1970 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1971 ; CHECK-NEXT: vle16.v v8, (a0)
1972 ; CHECK-NEXT: vle16.v v9, (a1)
1973 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1974 ; CHECK-NEXT: vminu.vv v8, v8, v9
1975 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1976 ; CHECK-NEXT: vse16.v v8, (a0)
1978 %a = load <6 x i16>, ptr %x
1979 %b = load <6 x i16>, ptr %y
1980 %cc = icmp ult <6 x i16> %a, %b
1981 %c = select <6 x i1> %cc, <6 x i16> %a, <6 x i16> %b
1982 store <6 x i16> %c, ptr %x
1986 define void @umin_v4i32(ptr %x, ptr %y) {
1987 ; CHECK-LABEL: umin_v4i32:
1989 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1990 ; CHECK-NEXT: vle32.v v8, (a0)
1991 ; CHECK-NEXT: vle32.v v9, (a1)
1992 ; CHECK-NEXT: vminu.vv v8, v8, v9
1993 ; CHECK-NEXT: vse32.v v8, (a0)
1995 %a = load <4 x i32>, ptr %x
1996 %b = load <4 x i32>, ptr %y
1997 %cc = icmp ult <4 x i32> %a, %b
1998 %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
1999 store <4 x i32> %c, ptr %x
2003 define void @umin_v2i64(ptr %x, ptr %y) {
2004 ; CHECK-LABEL: umin_v2i64:
2006 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2007 ; CHECK-NEXT: vle64.v v8, (a0)
2008 ; CHECK-NEXT: vle64.v v9, (a1)
2009 ; CHECK-NEXT: vminu.vv v8, v8, v9
2010 ; CHECK-NEXT: vse64.v v8, (a0)
2012 %a = load <2 x i64>, ptr %x
2013 %b = load <2 x i64>, ptr %y
2014 %cc = icmp ult <2 x i64> %a, %b
2015 %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
2016 store <2 x i64> %c, ptr %x
2020 define void @umin_vx_v16i8(ptr %x, i8 %y) {
2021 ; CHECK-LABEL: umin_vx_v16i8:
2023 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2024 ; CHECK-NEXT: vle8.v v8, (a0)
2025 ; CHECK-NEXT: vminu.vx v8, v8, a1
2026 ; CHECK-NEXT: vse8.v v8, (a0)
2028 %a = load <16 x i8>, ptr %x
2029 %b = insertelement <16 x i8> poison, i8 %y, i32 0
2030 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
2031 %d = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %c)
2032 store <16 x i8> %d, ptr %x
2035 declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
2037 define void @umin_vx_v8i16(ptr %x, i16 %y) {
2038 ; CHECK-LABEL: umin_vx_v8i16:
2040 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2041 ; CHECK-NEXT: vle16.v v8, (a0)
2042 ; CHECK-NEXT: vminu.vx v8, v8, a1
2043 ; CHECK-NEXT: vse16.v v8, (a0)
2045 %a = load <8 x i16>, ptr %x
2046 %b = insertelement <8 x i16> poison, i16 %y, i32 0
2047 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
2048 %d = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %c)
2049 store <8 x i16> %d, ptr %x
2052 declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
2054 define void @umin_vx_v6i16(ptr %x, i16 %y) {
2055 ; CHECK-LABEL: umin_vx_v6i16:
2057 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2058 ; CHECK-NEXT: vle16.v v8, (a0)
2059 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2060 ; CHECK-NEXT: vminu.vx v8, v8, a1
2061 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2062 ; CHECK-NEXT: vse16.v v8, (a0)
2064 %a = load <6 x i16>, ptr %x
2065 %b = insertelement <6 x i16> poison, i16 %y, i32 0
2066 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
2067 %d = call <6 x i16> @llvm.umin.v6i16(<6 x i16> %a, <6 x i16> %c)
2068 store <6 x i16> %d, ptr %x
2071 declare <6 x i16> @llvm.umin.v6i16(<6 x i16>, <6 x i16>)
2073 define void @umin_vx_v4i32(ptr %x, i32 %y) {
2074 ; CHECK-LABEL: umin_vx_v4i32:
2076 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2077 ; CHECK-NEXT: vle32.v v8, (a0)
2078 ; CHECK-NEXT: vminu.vx v8, v8, a1
2079 ; CHECK-NEXT: vse32.v v8, (a0)
2081 %a = load <4 x i32>, ptr %x
2082 %b = insertelement <4 x i32> poison, i32 %y, i32 0
2083 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
2084 %d = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %c)
2085 store <4 x i32> %d, ptr %x
2088 declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
2090 define void @umin_xv_v16i8(ptr %x, i8 %y) {
2091 ; CHECK-LABEL: umin_xv_v16i8:
2093 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2094 ; CHECK-NEXT: vle8.v v8, (a0)
2095 ; CHECK-NEXT: vminu.vx v8, v8, a1
2096 ; CHECK-NEXT: vse8.v v8, (a0)
2098 %a = load <16 x i8>, ptr %x
2099 %b = insertelement <16 x i8> poison, i8 %y, i32 0
2100 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
2101 %d = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %c, <16 x i8> %a)
2102 store <16 x i8> %d, ptr %x
2106 define void @umin_xv_v8i16(ptr %x, i16 %y) {
2107 ; CHECK-LABEL: umin_xv_v8i16:
2109 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2110 ; CHECK-NEXT: vle16.v v8, (a0)
2111 ; CHECK-NEXT: vminu.vx v8, v8, a1
2112 ; CHECK-NEXT: vse16.v v8, (a0)
2114 %a = load <8 x i16>, ptr %x
2115 %b = insertelement <8 x i16> poison, i16 %y, i32 0
2116 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
2117 %d = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %c, <8 x i16> %a)
2118 store <8 x i16> %d, ptr %x
2122 define void @umin_xv_v6i16(ptr %x, i16 %y) {
2123 ; CHECK-LABEL: umin_xv_v6i16:
2125 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2126 ; CHECK-NEXT: vle16.v v8, (a0)
2127 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2128 ; CHECK-NEXT: vminu.vx v8, v8, a1
2129 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2130 ; CHECK-NEXT: vse16.v v8, (a0)
2132 %a = load <6 x i16>, ptr %x
2133 %b = insertelement <6 x i16> poison, i16 %y, i32 0
2134 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
2135 %d = call <6 x i16> @llvm.umin.v6i16(<6 x i16> %c, <6 x i16> %a)
2136 store <6 x i16> %d, ptr %x
2140 define void @umin_xv_v4i32(ptr %x, i32 %y) {
2141 ; CHECK-LABEL: umin_xv_v4i32:
2143 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2144 ; CHECK-NEXT: vle32.v v8, (a0)
2145 ; CHECK-NEXT: vminu.vx v8, v8, a1
2146 ; CHECK-NEXT: vse32.v v8, (a0)
2148 %a = load <4 x i32>, ptr %x
2149 %b = insertelement <4 x i32> poison, i32 %y, i32 0
2150 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
2151 %d = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %c, <4 x i32> %a)
2152 store <4 x i32> %d, ptr %x
2156 define void @umax_v16i8(ptr %x, ptr %y) {
2157 ; CHECK-LABEL: umax_v16i8:
2159 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2160 ; CHECK-NEXT: vle8.v v8, (a0)
2161 ; CHECK-NEXT: vle8.v v9, (a1)
2162 ; CHECK-NEXT: vmaxu.vv v8, v8, v9
2163 ; CHECK-NEXT: vse8.v v8, (a0)
2165 %a = load <16 x i8>, ptr %x
2166 %b = load <16 x i8>, ptr %y
2167 %cc = icmp ugt <16 x i8> %a, %b
2168 %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
2169 store <16 x i8> %c, ptr %x
2173 define void @umax_v8i16(ptr %x, ptr %y) {
2174 ; CHECK-LABEL: umax_v8i16:
2176 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2177 ; CHECK-NEXT: vle16.v v8, (a0)
2178 ; CHECK-NEXT: vle16.v v9, (a1)
2179 ; CHECK-NEXT: vmaxu.vv v8, v8, v9
2180 ; CHECK-NEXT: vse16.v v8, (a0)
2182 %a = load <8 x i16>, ptr %x
2183 %b = load <8 x i16>, ptr %y
2184 %cc = icmp ugt <8 x i16> %a, %b
2185 %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
2186 store <8 x i16> %c, ptr %x
2190 define void @umax_v6i16(ptr %x, ptr %y) {
2191 ; CHECK-LABEL: umax_v6i16:
2193 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2194 ; CHECK-NEXT: vle16.v v8, (a0)
2195 ; CHECK-NEXT: vle16.v v9, (a1)
2196 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2197 ; CHECK-NEXT: vmaxu.vv v8, v8, v9
2198 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2199 ; CHECK-NEXT: vse16.v v8, (a0)
2201 %a = load <6 x i16>, ptr %x
2202 %b = load <6 x i16>, ptr %y
2203 %cc = icmp ugt <6 x i16> %a, %b
2204 %c = select <6 x i1> %cc, <6 x i16> %a, <6 x i16> %b
2205 store <6 x i16> %c, ptr %x
2209 define void @umax_v4i32(ptr %x, ptr %y) {
2210 ; CHECK-LABEL: umax_v4i32:
2212 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2213 ; CHECK-NEXT: vle32.v v8, (a0)
2214 ; CHECK-NEXT: vle32.v v9, (a1)
2215 ; CHECK-NEXT: vmaxu.vv v8, v8, v9
2216 ; CHECK-NEXT: vse32.v v8, (a0)
2218 %a = load <4 x i32>, ptr %x
2219 %b = load <4 x i32>, ptr %y
2220 %cc = icmp ugt <4 x i32> %a, %b
2221 %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
2222 store <4 x i32> %c, ptr %x
2226 define void @umax_v2i64(ptr %x, ptr %y) {
2227 ; CHECK-LABEL: umax_v2i64:
2229 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2230 ; CHECK-NEXT: vle64.v v8, (a0)
2231 ; CHECK-NEXT: vle64.v v9, (a1)
2232 ; CHECK-NEXT: vmaxu.vv v8, v8, v9
2233 ; CHECK-NEXT: vse64.v v8, (a0)
2235 %a = load <2 x i64>, ptr %x
2236 %b = load <2 x i64>, ptr %y
2237 %cc = icmp ugt <2 x i64> %a, %b
2238 %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
2239 store <2 x i64> %c, ptr %x
2243 define void @umax_vx_v16i8(ptr %x, i8 %y) {
2244 ; CHECK-LABEL: umax_vx_v16i8:
2246 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2247 ; CHECK-NEXT: vle8.v v8, (a0)
2248 ; CHECK-NEXT: vmaxu.vx v8, v8, a1
2249 ; CHECK-NEXT: vse8.v v8, (a0)
2251 %a = load <16 x i8>, ptr %x
2252 %b = insertelement <16 x i8> poison, i8 %y, i32 0
2253 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
2254 %d = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %c)
2255 store <16 x i8> %d, ptr %x
2258 declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
2260 define void @umax_vx_v8i16(ptr %x, i16 %y) {
2261 ; CHECK-LABEL: umax_vx_v8i16:
2263 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2264 ; CHECK-NEXT: vle16.v v8, (a0)
2265 ; CHECK-NEXT: vmaxu.vx v8, v8, a1
2266 ; CHECK-NEXT: vse16.v v8, (a0)
2268 %a = load <8 x i16>, ptr %x
2269 %b = insertelement <8 x i16> poison, i16 %y, i32 0
2270 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
2271 %d = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %c)
2272 store <8 x i16> %d, ptr %x
2275 declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
2277 define void @umax_vx_v6i16(ptr %x, i16 %y) {
2278 ; CHECK-LABEL: umax_vx_v6i16:
2280 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2281 ; CHECK-NEXT: vle16.v v8, (a0)
2282 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2283 ; CHECK-NEXT: vmaxu.vx v8, v8, a1
2284 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2285 ; CHECK-NEXT: vse16.v v8, (a0)
2287 %a = load <6 x i16>, ptr %x
2288 %b = insertelement <6 x i16> poison, i16 %y, i32 0
2289 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
2290 %d = call <6 x i16> @llvm.umax.v6i16(<6 x i16> %a, <6 x i16> %c)
2291 store <6 x i16> %d, ptr %x
2294 declare <6 x i16> @llvm.umax.v6i16(<6 x i16>, <6 x i16>)
2296 define void @umax_vx_v4i32(ptr %x, i32 %y) {
2297 ; CHECK-LABEL: umax_vx_v4i32:
2299 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2300 ; CHECK-NEXT: vle32.v v8, (a0)
2301 ; CHECK-NEXT: vmaxu.vx v8, v8, a1
2302 ; CHECK-NEXT: vse32.v v8, (a0)
2304 %a = load <4 x i32>, ptr %x
2305 %b = insertelement <4 x i32> poison, i32 %y, i32 0
2306 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
2307 %d = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %c)
2308 store <4 x i32> %d, ptr %x
2311 declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
2313 define void @umax_xv_v16i8(ptr %x, i8 %y) {
2314 ; CHECK-LABEL: umax_xv_v16i8:
2316 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2317 ; CHECK-NEXT: vle8.v v8, (a0)
2318 ; CHECK-NEXT: vmaxu.vx v8, v8, a1
2319 ; CHECK-NEXT: vse8.v v8, (a0)
2321 %a = load <16 x i8>, ptr %x
2322 %b = insertelement <16 x i8> poison, i8 %y, i32 0
2323 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
2324 %d = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %c, <16 x i8> %a)
2325 store <16 x i8> %d, ptr %x
2329 define void @umax_xv_v8i16(ptr %x, i16 %y) {
2330 ; CHECK-LABEL: umax_xv_v8i16:
2332 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2333 ; CHECK-NEXT: vle16.v v8, (a0)
2334 ; CHECK-NEXT: vmaxu.vx v8, v8, a1
2335 ; CHECK-NEXT: vse16.v v8, (a0)
2337 %a = load <8 x i16>, ptr %x
2338 %b = insertelement <8 x i16> poison, i16 %y, i32 0
2339 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
2340 %d = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %c, <8 x i16> %a)
2341 store <8 x i16> %d, ptr %x
2345 define void @umax_xv_v6i16(ptr %x, i16 %y) {
2346 ; CHECK-LABEL: umax_xv_v6i16:
2348 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2349 ; CHECK-NEXT: vle16.v v8, (a0)
2350 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2351 ; CHECK-NEXT: vmaxu.vx v8, v8, a1
2352 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2353 ; CHECK-NEXT: vse16.v v8, (a0)
2355 %a = load <6 x i16>, ptr %x
2356 %b = insertelement <6 x i16> poison, i16 %y, i32 0
2357 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
2358 %d = call <6 x i16> @llvm.umax.v6i16(<6 x i16> %c, <6 x i16> %a)
2359 store <6 x i16> %d, ptr %x
2363 define void @umax_xv_v4i32(ptr %x, i32 %y) {
2364 ; CHECK-LABEL: umax_xv_v4i32:
2366 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2367 ; CHECK-NEXT: vle32.v v8, (a0)
2368 ; CHECK-NEXT: vmaxu.vx v8, v8, a1
2369 ; CHECK-NEXT: vse32.v v8, (a0)
2371 %a = load <4 x i32>, ptr %x
2372 %b = insertelement <4 x i32> poison, i32 %y, i32 0
2373 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
2374 %d = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %c, <4 x i32> %a)
2375 store <4 x i32> %d, ptr %x
2379 define void @add_v32i8(ptr %x, ptr %y) {
2380 ; LMULMAX2-LABEL: add_v32i8:
2381 ; LMULMAX2: # %bb.0:
2382 ; LMULMAX2-NEXT: li a2, 32
2383 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
2384 ; LMULMAX2-NEXT: vle8.v v8, (a0)
2385 ; LMULMAX2-NEXT: vle8.v v10, (a1)
2386 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
2387 ; LMULMAX2-NEXT: vse8.v v8, (a0)
2388 ; LMULMAX2-NEXT: ret
2390 ; LMULMAX1-RV32-LABEL: add_v32i8:
2391 ; LMULMAX1-RV32: # %bb.0:
2392 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2393 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
2394 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2395 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
2396 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2397 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
2398 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
2399 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
2400 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11
2401 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
2402 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
2403 ; LMULMAX1-RV32-NEXT: ret
2405 ; LMULMAX1-RV64-LABEL: add_v32i8:
2406 ; LMULMAX1-RV64: # %bb.0:
2407 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2408 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
2409 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2410 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
2411 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2412 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
2413 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
2414 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
2415 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11
2416 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
2417 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
2418 ; LMULMAX1-RV64-NEXT: ret
2419 %a = load <32 x i8>, ptr %x
2420 %b = load <32 x i8>, ptr %y
2421 %c = add <32 x i8> %a, %b
2422 store <32 x i8> %c, ptr %x
2426 define void @add_v16i16(ptr %x, ptr %y) {
2427 ; LMULMAX2-LABEL: add_v16i16:
2428 ; LMULMAX2: # %bb.0:
2429 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2430 ; LMULMAX2-NEXT: vle16.v v8, (a0)
2431 ; LMULMAX2-NEXT: vle16.v v10, (a1)
2432 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
2433 ; LMULMAX2-NEXT: vse16.v v8, (a0)
2434 ; LMULMAX2-NEXT: ret
2436 ; LMULMAX1-RV32-LABEL: add_v16i16:
2437 ; LMULMAX1-RV32: # %bb.0:
2438 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2439 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
2440 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2441 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
2442 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2443 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
2444 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
2445 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
2446 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11
2447 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
2448 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
2449 ; LMULMAX1-RV32-NEXT: ret
2451 ; LMULMAX1-RV64-LABEL: add_v16i16:
2452 ; LMULMAX1-RV64: # %bb.0:
2453 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2454 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
2455 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2456 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
2457 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2458 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
2459 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
2460 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
2461 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11
2462 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
2463 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
2464 ; LMULMAX1-RV64-NEXT: ret
2465 %a = load <16 x i16>, ptr %x
2466 %b = load <16 x i16>, ptr %y
2467 %c = add <16 x i16> %a, %b
2468 store <16 x i16> %c, ptr %x
2472 define void @add_v8i32(ptr %x, ptr %y) {
2473 ; LMULMAX2-LABEL: add_v8i32:
2474 ; LMULMAX2: # %bb.0:
2475 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2476 ; LMULMAX2-NEXT: vle32.v v8, (a0)
2477 ; LMULMAX2-NEXT: vle32.v v10, (a1)
2478 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
2479 ; LMULMAX2-NEXT: vse32.v v8, (a0)
2480 ; LMULMAX2-NEXT: ret
2482 ; LMULMAX1-RV32-LABEL: add_v8i32:
2483 ; LMULMAX1-RV32: # %bb.0:
2484 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2485 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
2486 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2487 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
2488 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2489 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
2490 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
2491 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
2492 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11
2493 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
2494 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
2495 ; LMULMAX1-RV32-NEXT: ret
2497 ; LMULMAX1-RV64-LABEL: add_v8i32:
2498 ; LMULMAX1-RV64: # %bb.0:
2499 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2500 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
2501 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2502 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
2503 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2504 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
2505 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
2506 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
2507 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11
2508 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
2509 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
2510 ; LMULMAX1-RV64-NEXT: ret
2511 %a = load <8 x i32>, ptr %x
2512 %b = load <8 x i32>, ptr %y
2513 %c = add <8 x i32> %a, %b
2514 store <8 x i32> %c, ptr %x
2518 define void @add_v6i32(ptr %x, ptr %y) {
2519 ; LMULMAX2-LABEL: add_v6i32:
2520 ; LMULMAX2: # %bb.0:
2521 ; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma
2522 ; LMULMAX2-NEXT: vle32.v v8, (a0)
2523 ; LMULMAX2-NEXT: vle32.v v10, (a1)
2524 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2525 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
2526 ; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma
2527 ; LMULMAX2-NEXT: vse32.v v8, (a0)
2528 ; LMULMAX2-NEXT: ret
2530 ; LMULMAX1-RV32-LABEL: add_v6i32:
2531 ; LMULMAX1-RV32: # %bb.0:
2532 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2533 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2534 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
2535 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a1)
2536 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a2)
2537 ; LMULMAX1-RV32-NEXT: addi a1, a1, 16
2538 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
2539 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v9
2540 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
2541 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
2542 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v11
2543 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a2)
2544 ; LMULMAX1-RV32-NEXT: ret
2546 ; LMULMAX1-RV64-LABEL: add_v6i32:
2547 ; LMULMAX1-RV64: # %bb.0:
2548 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2549 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
2550 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2551 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
2552 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2553 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
2554 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
2555 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
2556 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11
2557 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
2558 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2559 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
2560 ; LMULMAX1-RV64-NEXT: ret
2561 %a = load <6 x i32>, ptr %x
2562 %b = load <6 x i32>, ptr %y
2563 %c = add <6 x i32> %a, %b
2564 store <6 x i32> %c, ptr %x
2568 define void @add_v4i64(ptr %x, ptr %y) {
2569 ; LMULMAX2-LABEL: add_v4i64:
2570 ; LMULMAX2: # %bb.0:
2571 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2572 ; LMULMAX2-NEXT: vle64.v v8, (a0)
2573 ; LMULMAX2-NEXT: vle64.v v10, (a1)
2574 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
2575 ; LMULMAX2-NEXT: vse64.v v8, (a0)
2576 ; LMULMAX2-NEXT: ret
2578 ; LMULMAX1-RV32-LABEL: add_v4i64:
2579 ; LMULMAX1-RV32: # %bb.0:
2580 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2581 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
2582 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2583 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
2584 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2585 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
2586 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
2587 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10
2588 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11
2589 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
2590 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
2591 ; LMULMAX1-RV32-NEXT: ret
2593 ; LMULMAX1-RV64-LABEL: add_v4i64:
2594 ; LMULMAX1-RV64: # %bb.0:
2595 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2596 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
2597 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2598 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
2599 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2600 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
2601 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
2602 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9
2603 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11
2604 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
2605 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
2606 ; LMULMAX1-RV64-NEXT: ret
2607 %a = load <4 x i64>, ptr %x
2608 %b = load <4 x i64>, ptr %y
2609 %c = add <4 x i64> %a, %b
2610 store <4 x i64> %c, ptr %x
2614 define void @sub_v32i8(ptr %x, ptr %y) {
2615 ; LMULMAX2-LABEL: sub_v32i8:
2616 ; LMULMAX2: # %bb.0:
2617 ; LMULMAX2-NEXT: li a2, 32
2618 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
2619 ; LMULMAX2-NEXT: vle8.v v8, (a0)
2620 ; LMULMAX2-NEXT: vle8.v v10, (a1)
2621 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
2622 ; LMULMAX2-NEXT: vse8.v v8, (a0)
2623 ; LMULMAX2-NEXT: ret
2625 ; LMULMAX1-RV32-LABEL: sub_v32i8:
2626 ; LMULMAX1-RV32: # %bb.0:
2627 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2628 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
2629 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2630 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
2631 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2632 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
2633 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
2634 ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10
2635 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11
2636 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
2637 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
2638 ; LMULMAX1-RV32-NEXT: ret
2640 ; LMULMAX1-RV64-LABEL: sub_v32i8:
2641 ; LMULMAX1-RV64: # %bb.0:
2642 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2643 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
2644 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2645 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
2646 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2647 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
2648 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
2649 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9
2650 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11
2651 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
2652 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
2653 ; LMULMAX1-RV64-NEXT: ret
2654 %a = load <32 x i8>, ptr %x
2655 %b = load <32 x i8>, ptr %y
2656 %c = sub <32 x i8> %a, %b
2657 store <32 x i8> %c, ptr %x
2661 define void @sub_v16i16(ptr %x, ptr %y) {
2662 ; LMULMAX2-LABEL: sub_v16i16:
2663 ; LMULMAX2: # %bb.0:
2664 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2665 ; LMULMAX2-NEXT: vle16.v v8, (a0)
2666 ; LMULMAX2-NEXT: vle16.v v10, (a1)
2667 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
2668 ; LMULMAX2-NEXT: vse16.v v8, (a0)
2669 ; LMULMAX2-NEXT: ret
2671 ; LMULMAX1-RV32-LABEL: sub_v16i16:
2672 ; LMULMAX1-RV32: # %bb.0:
2673 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2674 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
2675 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2676 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
2677 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2678 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
2679 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
2680 ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10
2681 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11
2682 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
2683 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
2684 ; LMULMAX1-RV32-NEXT: ret
2686 ; LMULMAX1-RV64-LABEL: sub_v16i16:
2687 ; LMULMAX1-RV64: # %bb.0:
2688 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2689 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
2690 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2691 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
2692 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2693 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
2694 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
2695 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9
2696 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11
2697 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
2698 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
2699 ; LMULMAX1-RV64-NEXT: ret
2700 %a = load <16 x i16>, ptr %x
2701 %b = load <16 x i16>, ptr %y
2702 %c = sub <16 x i16> %a, %b
2703 store <16 x i16> %c, ptr %x
2707 define void @sub_v8i32(ptr %x, ptr %y) {
2708 ; LMULMAX2-LABEL: sub_v8i32:
2709 ; LMULMAX2: # %bb.0:
2710 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2711 ; LMULMAX2-NEXT: vle32.v v8, (a0)
2712 ; LMULMAX2-NEXT: vle32.v v10, (a1)
2713 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
2714 ; LMULMAX2-NEXT: vse32.v v8, (a0)
2715 ; LMULMAX2-NEXT: ret
2717 ; LMULMAX1-RV32-LABEL: sub_v8i32:
2718 ; LMULMAX1-RV32: # %bb.0:
2719 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2720 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
2721 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2722 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
2723 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2724 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
2725 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
2726 ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10
2727 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11
2728 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
2729 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
2730 ; LMULMAX1-RV32-NEXT: ret
2732 ; LMULMAX1-RV64-LABEL: sub_v8i32:
2733 ; LMULMAX1-RV64: # %bb.0:
2734 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2735 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
2736 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2737 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
2738 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2739 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
2740 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
2741 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9
2742 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11
2743 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
2744 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
2745 ; LMULMAX1-RV64-NEXT: ret
2746 %a = load <8 x i32>, ptr %x
2747 %b = load <8 x i32>, ptr %y
2748 %c = sub <8 x i32> %a, %b
2749 store <8 x i32> %c, ptr %x
2753 define void @sub_v4i64(ptr %x, ptr %y) {
2754 ; LMULMAX2-LABEL: sub_v4i64:
2755 ; LMULMAX2: # %bb.0:
2756 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2757 ; LMULMAX2-NEXT: vle64.v v8, (a0)
2758 ; LMULMAX2-NEXT: vle64.v v10, (a1)
2759 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
2760 ; LMULMAX2-NEXT: vse64.v v8, (a0)
2761 ; LMULMAX2-NEXT: ret
2763 ; LMULMAX1-RV32-LABEL: sub_v4i64:
2764 ; LMULMAX1-RV32: # %bb.0:
2765 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2766 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
2767 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2768 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
2769 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2770 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
2771 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
2772 ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10
2773 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11
2774 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
2775 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
2776 ; LMULMAX1-RV32-NEXT: ret
2778 ; LMULMAX1-RV64-LABEL: sub_v4i64:
2779 ; LMULMAX1-RV64: # %bb.0:
2780 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2781 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
2782 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2783 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
2784 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2785 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
2786 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
2787 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9
2788 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11
2789 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
2790 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
2791 ; LMULMAX1-RV64-NEXT: ret
2792 %a = load <4 x i64>, ptr %x
2793 %b = load <4 x i64>, ptr %y
2794 %c = sub <4 x i64> %a, %b
2795 store <4 x i64> %c, ptr %x
2799 define void @mul_v32i8(ptr %x, ptr %y) {
2800 ; LMULMAX2-LABEL: mul_v32i8:
2801 ; LMULMAX2: # %bb.0:
2802 ; LMULMAX2-NEXT: li a2, 32
2803 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
2804 ; LMULMAX2-NEXT: vle8.v v8, (a0)
2805 ; LMULMAX2-NEXT: vle8.v v10, (a1)
2806 ; LMULMAX2-NEXT: vmul.vv v8, v8, v10
2807 ; LMULMAX2-NEXT: vse8.v v8, (a0)
2808 ; LMULMAX2-NEXT: ret
2810 ; LMULMAX1-RV32-LABEL: mul_v32i8:
2811 ; LMULMAX1-RV32: # %bb.0:
2812 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2813 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
2814 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2815 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
2816 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2817 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
2818 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
2819 ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10
2820 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11
2821 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
2822 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
2823 ; LMULMAX1-RV32-NEXT: ret
2825 ; LMULMAX1-RV64-LABEL: mul_v32i8:
2826 ; LMULMAX1-RV64: # %bb.0:
2827 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2828 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
2829 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2830 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
2831 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2832 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
2833 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
2834 ; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9
2835 ; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11
2836 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
2837 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
2838 ; LMULMAX1-RV64-NEXT: ret
2839 %a = load <32 x i8>, ptr %x
2840 %b = load <32 x i8>, ptr %y
2841 %c = mul <32 x i8> %a, %b
2842 store <32 x i8> %c, ptr %x
2846 define void @mul_v16i16(ptr %x, ptr %y) {
2847 ; LMULMAX2-LABEL: mul_v16i16:
2848 ; LMULMAX2: # %bb.0:
2849 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2850 ; LMULMAX2-NEXT: vle16.v v8, (a0)
2851 ; LMULMAX2-NEXT: vle16.v v10, (a1)
2852 ; LMULMAX2-NEXT: vmul.vv v8, v8, v10
2853 ; LMULMAX2-NEXT: vse16.v v8, (a0)
2854 ; LMULMAX2-NEXT: ret
2856 ; LMULMAX1-RV32-LABEL: mul_v16i16:
2857 ; LMULMAX1-RV32: # %bb.0:
2858 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2859 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
2860 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2861 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
2862 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2863 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
2864 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
2865 ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10
2866 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11
2867 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
2868 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
2869 ; LMULMAX1-RV32-NEXT: ret
2871 ; LMULMAX1-RV64-LABEL: mul_v16i16:
2872 ; LMULMAX1-RV64: # %bb.0:
2873 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2874 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
2875 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2876 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
2877 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2878 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
2879 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
2880 ; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9
2881 ; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11
2882 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
2883 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
2884 ; LMULMAX1-RV64-NEXT: ret
2885 %a = load <16 x i16>, ptr %x
2886 %b = load <16 x i16>, ptr %y
2887 %c = mul <16 x i16> %a, %b
2888 store <16 x i16> %c, ptr %x
2892 define void @mul_v8i32(ptr %x, ptr %y) {
2893 ; LMULMAX2-LABEL: mul_v8i32:
2894 ; LMULMAX2: # %bb.0:
2895 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2896 ; LMULMAX2-NEXT: vle32.v v8, (a0)
2897 ; LMULMAX2-NEXT: vle32.v v10, (a1)
2898 ; LMULMAX2-NEXT: vmul.vv v8, v8, v10
2899 ; LMULMAX2-NEXT: vse32.v v8, (a0)
2900 ; LMULMAX2-NEXT: ret
2902 ; LMULMAX1-RV32-LABEL: mul_v8i32:
2903 ; LMULMAX1-RV32: # %bb.0:
2904 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2905 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
2906 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2907 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
2908 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2909 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
2910 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
2911 ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10
2912 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11
2913 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
2914 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
2915 ; LMULMAX1-RV32-NEXT: ret
2917 ; LMULMAX1-RV64-LABEL: mul_v8i32:
2918 ; LMULMAX1-RV64: # %bb.0:
2919 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2920 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
2921 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2922 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
2923 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2924 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
2925 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
2926 ; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9
2927 ; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11
2928 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
2929 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
2930 ; LMULMAX1-RV64-NEXT: ret
2931 %a = load <8 x i32>, ptr %x
2932 %b = load <8 x i32>, ptr %y
2933 %c = mul <8 x i32> %a, %b
2934 store <8 x i32> %c, ptr %x
2938 define void @mul_v4i64(ptr %x, ptr %y) {
2939 ; LMULMAX2-LABEL: mul_v4i64:
2940 ; LMULMAX2: # %bb.0:
2941 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2942 ; LMULMAX2-NEXT: vle64.v v8, (a0)
2943 ; LMULMAX2-NEXT: vle64.v v10, (a1)
2944 ; LMULMAX2-NEXT: vmul.vv v8, v8, v10
2945 ; LMULMAX2-NEXT: vse64.v v8, (a0)
2946 ; LMULMAX2-NEXT: ret
2948 ; LMULMAX1-RV32-LABEL: mul_v4i64:
2949 ; LMULMAX1-RV32: # %bb.0:
2950 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2951 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
2952 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
2953 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
2954 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
2955 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
2956 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
2957 ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10
2958 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11
2959 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
2960 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
2961 ; LMULMAX1-RV32-NEXT: ret
2963 ; LMULMAX1-RV64-LABEL: mul_v4i64:
2964 ; LMULMAX1-RV64: # %bb.0:
2965 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2966 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
2967 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
2968 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
2969 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
2970 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
2971 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
2972 ; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9
2973 ; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11
2974 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
2975 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
2976 ; LMULMAX1-RV64-NEXT: ret
2977 %a = load <4 x i64>, ptr %x
2978 %b = load <4 x i64>, ptr %y
2979 %c = mul <4 x i64> %a, %b
2980 store <4 x i64> %c, ptr %x
2984 define void @and_v32i8(ptr %x, ptr %y) {
2985 ; LMULMAX2-LABEL: and_v32i8:
2986 ; LMULMAX2: # %bb.0:
2987 ; LMULMAX2-NEXT: li a2, 32
2988 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
2989 ; LMULMAX2-NEXT: vle8.v v8, (a0)
2990 ; LMULMAX2-NEXT: vle8.v v10, (a1)
2991 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
2992 ; LMULMAX2-NEXT: vse8.v v8, (a0)
2993 ; LMULMAX2-NEXT: ret
2995 ; LMULMAX1-RV32-LABEL: and_v32i8:
2996 ; LMULMAX1-RV32: # %bb.0:
2997 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
2998 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
2999 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3000 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
3001 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3002 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
3003 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
3004 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10
3005 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11
3006 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
3007 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
3008 ; LMULMAX1-RV32-NEXT: ret
3010 ; LMULMAX1-RV64-LABEL: and_v32i8:
3011 ; LMULMAX1-RV64: # %bb.0:
3012 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3013 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
3014 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3015 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
3016 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3017 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
3018 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
3019 ; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9
3020 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11
3021 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
3022 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
3023 ; LMULMAX1-RV64-NEXT: ret
3024 %a = load <32 x i8>, ptr %x
3025 %b = load <32 x i8>, ptr %y
3026 %c = and <32 x i8> %a, %b
3027 store <32 x i8> %c, ptr %x
3031 define void @and_v16i16(ptr %x, ptr %y) {
3032 ; LMULMAX2-LABEL: and_v16i16:
3033 ; LMULMAX2: # %bb.0:
3034 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
3035 ; LMULMAX2-NEXT: vle16.v v8, (a0)
3036 ; LMULMAX2-NEXT: vle16.v v10, (a1)
3037 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
3038 ; LMULMAX2-NEXT: vse16.v v8, (a0)
3039 ; LMULMAX2-NEXT: ret
3041 ; LMULMAX1-RV32-LABEL: and_v16i16:
3042 ; LMULMAX1-RV32: # %bb.0:
3043 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3044 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
3045 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3046 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
3047 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3048 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
3049 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
3050 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10
3051 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11
3052 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
3053 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
3054 ; LMULMAX1-RV32-NEXT: ret
3056 ; LMULMAX1-RV64-LABEL: and_v16i16:
3057 ; LMULMAX1-RV64: # %bb.0:
3058 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3059 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
3060 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3061 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
3062 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3063 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
3064 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
3065 ; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9
3066 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11
3067 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
3068 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
3069 ; LMULMAX1-RV64-NEXT: ret
3070 %a = load <16 x i16>, ptr %x
3071 %b = load <16 x i16>, ptr %y
3072 %c = and <16 x i16> %a, %b
3073 store <16 x i16> %c, ptr %x
3077 define void @and_v8i32(ptr %x, ptr %y) {
3078 ; LMULMAX2-LABEL: and_v8i32:
3079 ; LMULMAX2: # %bb.0:
3080 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3081 ; LMULMAX2-NEXT: vle32.v v8, (a0)
3082 ; LMULMAX2-NEXT: vle32.v v10, (a1)
3083 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
3084 ; LMULMAX2-NEXT: vse32.v v8, (a0)
3085 ; LMULMAX2-NEXT: ret
3087 ; LMULMAX1-RV32-LABEL: and_v8i32:
3088 ; LMULMAX1-RV32: # %bb.0:
3089 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3090 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
3091 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3092 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
3093 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3094 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
3095 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
3096 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10
3097 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11
3098 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
3099 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
3100 ; LMULMAX1-RV32-NEXT: ret
3102 ; LMULMAX1-RV64-LABEL: and_v8i32:
3103 ; LMULMAX1-RV64: # %bb.0:
3104 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3105 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
3106 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3107 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
3108 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3109 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
3110 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
3111 ; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9
3112 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11
3113 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
3114 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
3115 ; LMULMAX1-RV64-NEXT: ret
3116 %a = load <8 x i32>, ptr %x
3117 %b = load <8 x i32>, ptr %y
3118 %c = and <8 x i32> %a, %b
3119 store <8 x i32> %c, ptr %x
3123 define void @and_v4i64(ptr %x, ptr %y) {
3124 ; LMULMAX2-LABEL: and_v4i64:
3125 ; LMULMAX2: # %bb.0:
3126 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3127 ; LMULMAX2-NEXT: vle64.v v8, (a0)
3128 ; LMULMAX2-NEXT: vle64.v v10, (a1)
3129 ; LMULMAX2-NEXT: vand.vv v8, v8, v10
3130 ; LMULMAX2-NEXT: vse64.v v8, (a0)
3131 ; LMULMAX2-NEXT: ret
3133 ; LMULMAX1-RV32-LABEL: and_v4i64:
3134 ; LMULMAX1-RV32: # %bb.0:
3135 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3136 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
3137 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3138 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
3139 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3140 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
3141 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
3142 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10
3143 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11
3144 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
3145 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
3146 ; LMULMAX1-RV32-NEXT: ret
3148 ; LMULMAX1-RV64-LABEL: and_v4i64:
3149 ; LMULMAX1-RV64: # %bb.0:
3150 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3151 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
3152 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3153 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
3154 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3155 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
3156 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
3157 ; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9
3158 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11
3159 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
3160 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
3161 ; LMULMAX1-RV64-NEXT: ret
3162 %a = load <4 x i64>, ptr %x
3163 %b = load <4 x i64>, ptr %y
3164 %c = and <4 x i64> %a, %b
3165 store <4 x i64> %c, ptr %x
3169 define void @or_v32i8(ptr %x, ptr %y) {
3170 ; LMULMAX2-LABEL: or_v32i8:
3171 ; LMULMAX2: # %bb.0:
3172 ; LMULMAX2-NEXT: li a2, 32
3173 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
3174 ; LMULMAX2-NEXT: vle8.v v8, (a0)
3175 ; LMULMAX2-NEXT: vle8.v v10, (a1)
3176 ; LMULMAX2-NEXT: vor.vv v8, v8, v10
3177 ; LMULMAX2-NEXT: vse8.v v8, (a0)
3178 ; LMULMAX2-NEXT: ret
3180 ; LMULMAX1-RV32-LABEL: or_v32i8:
3181 ; LMULMAX1-RV32: # %bb.0:
3182 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3183 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
3184 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3185 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
3186 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3187 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
3188 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
3189 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
3190 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11
3191 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
3192 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
3193 ; LMULMAX1-RV32-NEXT: ret
3195 ; LMULMAX1-RV64-LABEL: or_v32i8:
3196 ; LMULMAX1-RV64: # %bb.0:
3197 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3198 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
3199 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3200 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
3201 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3202 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
3203 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
3204 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9
3205 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11
3206 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
3207 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
3208 ; LMULMAX1-RV64-NEXT: ret
3209 %a = load <32 x i8>, ptr %x
3210 %b = load <32 x i8>, ptr %y
3211 %c = or <32 x i8> %a, %b
3212 store <32 x i8> %c, ptr %x
3216 define void @or_v16i16(ptr %x, ptr %y) {
3217 ; LMULMAX2-LABEL: or_v16i16:
3218 ; LMULMAX2: # %bb.0:
3219 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
3220 ; LMULMAX2-NEXT: vle16.v v8, (a0)
3221 ; LMULMAX2-NEXT: vle16.v v10, (a1)
3222 ; LMULMAX2-NEXT: vor.vv v8, v8, v10
3223 ; LMULMAX2-NEXT: vse16.v v8, (a0)
3224 ; LMULMAX2-NEXT: ret
3226 ; LMULMAX1-RV32-LABEL: or_v16i16:
3227 ; LMULMAX1-RV32: # %bb.0:
3228 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3229 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
3230 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3231 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
3232 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3233 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
3234 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
3235 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
3236 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11
3237 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
3238 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
3239 ; LMULMAX1-RV32-NEXT: ret
3241 ; LMULMAX1-RV64-LABEL: or_v16i16:
3242 ; LMULMAX1-RV64: # %bb.0:
3243 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3244 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
3245 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3246 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
3247 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3248 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
3249 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
3250 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9
3251 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11
3252 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
3253 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
3254 ; LMULMAX1-RV64-NEXT: ret
3255 %a = load <16 x i16>, ptr %x
3256 %b = load <16 x i16>, ptr %y
3257 %c = or <16 x i16> %a, %b
3258 store <16 x i16> %c, ptr %x
3262 define void @or_v8i32(ptr %x, ptr %y) {
3263 ; LMULMAX2-LABEL: or_v8i32:
3264 ; LMULMAX2: # %bb.0:
3265 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3266 ; LMULMAX2-NEXT: vle32.v v8, (a0)
3267 ; LMULMAX2-NEXT: vle32.v v10, (a1)
3268 ; LMULMAX2-NEXT: vor.vv v8, v8, v10
3269 ; LMULMAX2-NEXT: vse32.v v8, (a0)
3270 ; LMULMAX2-NEXT: ret
3272 ; LMULMAX1-RV32-LABEL: or_v8i32:
3273 ; LMULMAX1-RV32: # %bb.0:
3274 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3275 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
3276 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3277 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
3278 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3279 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
3280 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
3281 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
3282 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11
3283 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
3284 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
3285 ; LMULMAX1-RV32-NEXT: ret
3287 ; LMULMAX1-RV64-LABEL: or_v8i32:
3288 ; LMULMAX1-RV64: # %bb.0:
3289 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3290 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
3291 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3292 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
3293 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3294 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
3295 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
3296 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9
3297 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11
3298 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
3299 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
3300 ; LMULMAX1-RV64-NEXT: ret
3301 %a = load <8 x i32>, ptr %x
3302 %b = load <8 x i32>, ptr %y
3303 %c = or <8 x i32> %a, %b
3304 store <8 x i32> %c, ptr %x
3308 define void @or_v4i64(ptr %x, ptr %y) {
3309 ; LMULMAX2-LABEL: or_v4i64:
3310 ; LMULMAX2: # %bb.0:
3311 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3312 ; LMULMAX2-NEXT: vle64.v v8, (a0)
3313 ; LMULMAX2-NEXT: vle64.v v10, (a1)
3314 ; LMULMAX2-NEXT: vor.vv v8, v8, v10
3315 ; LMULMAX2-NEXT: vse64.v v8, (a0)
3316 ; LMULMAX2-NEXT: ret
3318 ; LMULMAX1-RV32-LABEL: or_v4i64:
3319 ; LMULMAX1-RV32: # %bb.0:
3320 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3321 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
3322 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3323 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
3324 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3325 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
3326 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
3327 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10
3328 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11
3329 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
3330 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
3331 ; LMULMAX1-RV32-NEXT: ret
3333 ; LMULMAX1-RV64-LABEL: or_v4i64:
3334 ; LMULMAX1-RV64: # %bb.0:
3335 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3336 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
3337 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3338 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
3339 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3340 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
3341 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
3342 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9
3343 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11
3344 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
3345 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
3346 ; LMULMAX1-RV64-NEXT: ret
3347 %a = load <4 x i64>, ptr %x
3348 %b = load <4 x i64>, ptr %y
3349 %c = or <4 x i64> %a, %b
3350 store <4 x i64> %c, ptr %x
3354 define void @xor_v32i8(ptr %x, ptr %y) {
3355 ; LMULMAX2-LABEL: xor_v32i8:
3356 ; LMULMAX2: # %bb.0:
3357 ; LMULMAX2-NEXT: li a2, 32
3358 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
3359 ; LMULMAX2-NEXT: vle8.v v8, (a0)
3360 ; LMULMAX2-NEXT: vle8.v v10, (a1)
3361 ; LMULMAX2-NEXT: vxor.vv v8, v8, v10
3362 ; LMULMAX2-NEXT: vse8.v v8, (a0)
3363 ; LMULMAX2-NEXT: ret
3365 ; LMULMAX1-RV32-LABEL: xor_v32i8:
3366 ; LMULMAX1-RV32: # %bb.0:
3367 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3368 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
3369 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3370 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
3371 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3372 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
3373 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
3374 ; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10
3375 ; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11
3376 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
3377 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
3378 ; LMULMAX1-RV32-NEXT: ret
3380 ; LMULMAX1-RV64-LABEL: xor_v32i8:
3381 ; LMULMAX1-RV64: # %bb.0:
3382 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3383 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
3384 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3385 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
3386 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3387 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
3388 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
3389 ; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9
3390 ; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11
3391 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
3392 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
3393 ; LMULMAX1-RV64-NEXT: ret
3394 %a = load <32 x i8>, ptr %x
3395 %b = load <32 x i8>, ptr %y
3396 %c = xor <32 x i8> %a, %b
3397 store <32 x i8> %c, ptr %x
3401 define void @xor_v16i16(ptr %x, ptr %y) {
3402 ; LMULMAX2-LABEL: xor_v16i16:
3403 ; LMULMAX2: # %bb.0:
3404 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
3405 ; LMULMAX2-NEXT: vle16.v v8, (a0)
3406 ; LMULMAX2-NEXT: vle16.v v10, (a1)
3407 ; LMULMAX2-NEXT: vxor.vv v8, v8, v10
3408 ; LMULMAX2-NEXT: vse16.v v8, (a0)
3409 ; LMULMAX2-NEXT: ret
3411 ; LMULMAX1-RV32-LABEL: xor_v16i16:
3412 ; LMULMAX1-RV32: # %bb.0:
3413 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3414 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
3415 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3416 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
3417 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3418 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
3419 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
3420 ; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10
3421 ; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11
3422 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
3423 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
3424 ; LMULMAX1-RV32-NEXT: ret
3426 ; LMULMAX1-RV64-LABEL: xor_v16i16:
3427 ; LMULMAX1-RV64: # %bb.0:
3428 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3429 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
3430 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3431 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
3432 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3433 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
3434 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
3435 ; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9
3436 ; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11
3437 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
3438 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
3439 ; LMULMAX1-RV64-NEXT: ret
3440 %a = load <16 x i16>, ptr %x
3441 %b = load <16 x i16>, ptr %y
3442 %c = xor <16 x i16> %a, %b
3443 store <16 x i16> %c, ptr %x
3447 define void @xor_v8i32(ptr %x, ptr %y) {
3448 ; LMULMAX2-LABEL: xor_v8i32:
3449 ; LMULMAX2: # %bb.0:
3450 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3451 ; LMULMAX2-NEXT: vle32.v v8, (a0)
3452 ; LMULMAX2-NEXT: vle32.v v10, (a1)
3453 ; LMULMAX2-NEXT: vxor.vv v8, v8, v10
3454 ; LMULMAX2-NEXT: vse32.v v8, (a0)
3455 ; LMULMAX2-NEXT: ret
3457 ; LMULMAX1-RV32-LABEL: xor_v8i32:
3458 ; LMULMAX1-RV32: # %bb.0:
3459 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3460 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
3461 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3462 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
3463 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3464 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
3465 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
3466 ; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10
3467 ; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11
3468 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
3469 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
3470 ; LMULMAX1-RV32-NEXT: ret
3472 ; LMULMAX1-RV64-LABEL: xor_v8i32:
3473 ; LMULMAX1-RV64: # %bb.0:
3474 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3475 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
3476 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3477 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
3478 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3479 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
3480 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
3481 ; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9
3482 ; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11
3483 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
3484 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
3485 ; LMULMAX1-RV64-NEXT: ret
3486 %a = load <8 x i32>, ptr %x
3487 %b = load <8 x i32>, ptr %y
3488 %c = xor <8 x i32> %a, %b
3489 store <8 x i32> %c, ptr %x
3493 define void @xor_v4i64(ptr %x, ptr %y) {
3494 ; LMULMAX2-LABEL: xor_v4i64:
3495 ; LMULMAX2: # %bb.0:
3496 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3497 ; LMULMAX2-NEXT: vle64.v v8, (a0)
3498 ; LMULMAX2-NEXT: vle64.v v10, (a1)
3499 ; LMULMAX2-NEXT: vxor.vv v8, v8, v10
3500 ; LMULMAX2-NEXT: vse64.v v8, (a0)
3501 ; LMULMAX2-NEXT: ret
3503 ; LMULMAX1-RV32-LABEL: xor_v4i64:
3504 ; LMULMAX1-RV32: # %bb.0:
3505 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3506 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
3507 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3508 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
3509 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3510 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
3511 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
3512 ; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10
3513 ; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11
3514 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
3515 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
3516 ; LMULMAX1-RV32-NEXT: ret
3518 ; LMULMAX1-RV64-LABEL: xor_v4i64:
3519 ; LMULMAX1-RV64: # %bb.0:
3520 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3521 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
3522 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3523 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
3524 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3525 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
3526 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
3527 ; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9
3528 ; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11
3529 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
3530 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
3531 ; LMULMAX1-RV64-NEXT: ret
3532 %a = load <4 x i64>, ptr %x
3533 %b = load <4 x i64>, ptr %y
3534 %c = xor <4 x i64> %a, %b
3535 store <4 x i64> %c, ptr %x
3539 define void @lshr_v32i8(ptr %x, ptr %y) {
3540 ; LMULMAX2-LABEL: lshr_v32i8:
3541 ; LMULMAX2: # %bb.0:
3542 ; LMULMAX2-NEXT: li a2, 32
3543 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
3544 ; LMULMAX2-NEXT: vle8.v v8, (a0)
3545 ; LMULMAX2-NEXT: vle8.v v10, (a1)
3546 ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
3547 ; LMULMAX2-NEXT: vse8.v v8, (a0)
3548 ; LMULMAX2-NEXT: ret
3550 ; LMULMAX1-RV32-LABEL: lshr_v32i8:
3551 ; LMULMAX1-RV32: # %bb.0:
3552 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3553 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
3554 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3555 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
3556 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3557 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
3558 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
3559 ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10
3560 ; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11
3561 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
3562 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
3563 ; LMULMAX1-RV32-NEXT: ret
3565 ; LMULMAX1-RV64-LABEL: lshr_v32i8:
3566 ; LMULMAX1-RV64: # %bb.0:
3567 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3568 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
3569 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3570 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
3571 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3572 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
3573 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
3574 ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9
3575 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11
3576 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
3577 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
3578 ; LMULMAX1-RV64-NEXT: ret
3579 %a = load <32 x i8>, ptr %x
3580 %b = load <32 x i8>, ptr %y
3581 %c = lshr <32 x i8> %a, %b
3582 store <32 x i8> %c, ptr %x
3586 define void @lshr_v16i16(ptr %x, ptr %y) {
3587 ; LMULMAX2-LABEL: lshr_v16i16:
3588 ; LMULMAX2: # %bb.0:
3589 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
3590 ; LMULMAX2-NEXT: vle16.v v8, (a0)
3591 ; LMULMAX2-NEXT: vle16.v v10, (a1)
3592 ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
3593 ; LMULMAX2-NEXT: vse16.v v8, (a0)
3594 ; LMULMAX2-NEXT: ret
3596 ; LMULMAX1-RV32-LABEL: lshr_v16i16:
3597 ; LMULMAX1-RV32: # %bb.0:
3598 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3599 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
3600 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3601 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
3602 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3603 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
3604 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
3605 ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10
3606 ; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11
3607 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
3608 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
3609 ; LMULMAX1-RV32-NEXT: ret
3611 ; LMULMAX1-RV64-LABEL: lshr_v16i16:
3612 ; LMULMAX1-RV64: # %bb.0:
3613 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3614 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
3615 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3616 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
3617 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3618 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
3619 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
3620 ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9
3621 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11
3622 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
3623 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
3624 ; LMULMAX1-RV64-NEXT: ret
3625 %a = load <16 x i16>, ptr %x
3626 %b = load <16 x i16>, ptr %y
3627 %c = lshr <16 x i16> %a, %b
3628 store <16 x i16> %c, ptr %x
3632 define void @lshr_v8i32(ptr %x, ptr %y) {
3633 ; LMULMAX2-LABEL: lshr_v8i32:
3634 ; LMULMAX2: # %bb.0:
3635 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3636 ; LMULMAX2-NEXT: vle32.v v8, (a0)
3637 ; LMULMAX2-NEXT: vle32.v v10, (a1)
3638 ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
3639 ; LMULMAX2-NEXT: vse32.v v8, (a0)
3640 ; LMULMAX2-NEXT: ret
3642 ; LMULMAX1-RV32-LABEL: lshr_v8i32:
3643 ; LMULMAX1-RV32: # %bb.0:
3644 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3645 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
3646 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3647 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
3648 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3649 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
3650 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
3651 ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10
3652 ; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11
3653 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
3654 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
3655 ; LMULMAX1-RV32-NEXT: ret
3657 ; LMULMAX1-RV64-LABEL: lshr_v8i32:
3658 ; LMULMAX1-RV64: # %bb.0:
3659 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3660 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
3661 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3662 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
3663 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3664 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
3665 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
3666 ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9
3667 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11
3668 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
3669 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
3670 ; LMULMAX1-RV64-NEXT: ret
3671 %a = load <8 x i32>, ptr %x
3672 %b = load <8 x i32>, ptr %y
3673 %c = lshr <8 x i32> %a, %b
3674 store <8 x i32> %c, ptr %x
3678 define void @lshr_v4i64(ptr %x, ptr %y) {
3679 ; LMULMAX2-LABEL: lshr_v4i64:
3680 ; LMULMAX2: # %bb.0:
3681 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3682 ; LMULMAX2-NEXT: vle64.v v8, (a0)
3683 ; LMULMAX2-NEXT: vle64.v v10, (a1)
3684 ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
3685 ; LMULMAX2-NEXT: vse64.v v8, (a0)
3686 ; LMULMAX2-NEXT: ret
3688 ; LMULMAX1-RV32-LABEL: lshr_v4i64:
3689 ; LMULMAX1-RV32: # %bb.0:
3690 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3691 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
3692 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3693 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
3694 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3695 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
3696 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
3697 ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10
3698 ; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11
3699 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
3700 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
3701 ; LMULMAX1-RV32-NEXT: ret
3703 ; LMULMAX1-RV64-LABEL: lshr_v4i64:
3704 ; LMULMAX1-RV64: # %bb.0:
3705 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3706 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
3707 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3708 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
3709 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3710 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
3711 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
3712 ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9
3713 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11
3714 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
3715 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
3716 ; LMULMAX1-RV64-NEXT: ret
3717 %a = load <4 x i64>, ptr %x
3718 %b = load <4 x i64>, ptr %y
3719 %c = lshr <4 x i64> %a, %b
3720 store <4 x i64> %c, ptr %x
3724 define void @ashr_v32i8(ptr %x, ptr %y) {
3725 ; LMULMAX2-LABEL: ashr_v32i8:
3726 ; LMULMAX2: # %bb.0:
3727 ; LMULMAX2-NEXT: li a2, 32
3728 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
3729 ; LMULMAX2-NEXT: vle8.v v8, (a0)
3730 ; LMULMAX2-NEXT: vle8.v v10, (a1)
3731 ; LMULMAX2-NEXT: vsra.vv v8, v8, v10
3732 ; LMULMAX2-NEXT: vse8.v v8, (a0)
3733 ; LMULMAX2-NEXT: ret
3735 ; LMULMAX1-RV32-LABEL: ashr_v32i8:
3736 ; LMULMAX1-RV32: # %bb.0:
3737 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3738 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
3739 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3740 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
3741 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3742 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
3743 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
3744 ; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10
3745 ; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11
3746 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
3747 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
3748 ; LMULMAX1-RV32-NEXT: ret
3750 ; LMULMAX1-RV64-LABEL: ashr_v32i8:
3751 ; LMULMAX1-RV64: # %bb.0:
3752 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3753 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
3754 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3755 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
3756 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3757 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
3758 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
3759 ; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9
3760 ; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11
3761 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
3762 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
3763 ; LMULMAX1-RV64-NEXT: ret
3764 %a = load <32 x i8>, ptr %x
3765 %b = load <32 x i8>, ptr %y
3766 %c = ashr <32 x i8> %a, %b
3767 store <32 x i8> %c, ptr %x
3771 define void @ashr_v16i16(ptr %x, ptr %y) {
3772 ; LMULMAX2-LABEL: ashr_v16i16:
3773 ; LMULMAX2: # %bb.0:
3774 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
3775 ; LMULMAX2-NEXT: vle16.v v8, (a0)
3776 ; LMULMAX2-NEXT: vle16.v v10, (a1)
3777 ; LMULMAX2-NEXT: vsra.vv v8, v8, v10
3778 ; LMULMAX2-NEXT: vse16.v v8, (a0)
3779 ; LMULMAX2-NEXT: ret
3781 ; LMULMAX1-RV32-LABEL: ashr_v16i16:
3782 ; LMULMAX1-RV32: # %bb.0:
3783 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3784 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
3785 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3786 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
3787 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3788 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
3789 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
3790 ; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10
3791 ; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11
3792 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
3793 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
3794 ; LMULMAX1-RV32-NEXT: ret
3796 ; LMULMAX1-RV64-LABEL: ashr_v16i16:
3797 ; LMULMAX1-RV64: # %bb.0:
3798 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3799 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
3800 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3801 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
3802 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3803 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
3804 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
3805 ; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9
3806 ; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11
3807 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
3808 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
3809 ; LMULMAX1-RV64-NEXT: ret
3810 %a = load <16 x i16>, ptr %x
3811 %b = load <16 x i16>, ptr %y
3812 %c = ashr <16 x i16> %a, %b
3813 store <16 x i16> %c, ptr %x
3817 define void @ashr_v8i32(ptr %x, ptr %y) {
3818 ; LMULMAX2-LABEL: ashr_v8i32:
3819 ; LMULMAX2: # %bb.0:
3820 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3821 ; LMULMAX2-NEXT: vle32.v v8, (a0)
3822 ; LMULMAX2-NEXT: vle32.v v10, (a1)
3823 ; LMULMAX2-NEXT: vsra.vv v8, v8, v10
3824 ; LMULMAX2-NEXT: vse32.v v8, (a0)
3825 ; LMULMAX2-NEXT: ret
3827 ; LMULMAX1-RV32-LABEL: ashr_v8i32:
3828 ; LMULMAX1-RV32: # %bb.0:
3829 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3830 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
3831 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3832 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
3833 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3834 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
3835 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
3836 ; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10
3837 ; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11
3838 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
3839 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
3840 ; LMULMAX1-RV32-NEXT: ret
3842 ; LMULMAX1-RV64-LABEL: ashr_v8i32:
3843 ; LMULMAX1-RV64: # %bb.0:
3844 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3845 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
3846 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3847 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
3848 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3849 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
3850 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
3851 ; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9
3852 ; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11
3853 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
3854 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
3855 ; LMULMAX1-RV64-NEXT: ret
3856 %a = load <8 x i32>, ptr %x
3857 %b = load <8 x i32>, ptr %y
3858 %c = ashr <8 x i32> %a, %b
3859 store <8 x i32> %c, ptr %x
3863 define void @ashr_v4i64(ptr %x, ptr %y) {
3864 ; LMULMAX2-LABEL: ashr_v4i64:
3865 ; LMULMAX2: # %bb.0:
3866 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3867 ; LMULMAX2-NEXT: vle64.v v8, (a0)
3868 ; LMULMAX2-NEXT: vle64.v v10, (a1)
3869 ; LMULMAX2-NEXT: vsra.vv v8, v8, v10
3870 ; LMULMAX2-NEXT: vse64.v v8, (a0)
3871 ; LMULMAX2-NEXT: ret
3873 ; LMULMAX1-RV32-LABEL: ashr_v4i64:
3874 ; LMULMAX1-RV32: # %bb.0:
3875 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3876 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
3877 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3878 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
3879 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3880 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
3881 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
3882 ; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10
3883 ; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11
3884 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
3885 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
3886 ; LMULMAX1-RV32-NEXT: ret
3888 ; LMULMAX1-RV64-LABEL: ashr_v4i64:
3889 ; LMULMAX1-RV64: # %bb.0:
3890 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3891 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
3892 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3893 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
3894 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3895 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
3896 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
3897 ; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9
3898 ; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11
3899 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
3900 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
3901 ; LMULMAX1-RV64-NEXT: ret
3902 %a = load <4 x i64>, ptr %x
3903 %b = load <4 x i64>, ptr %y
3904 %c = ashr <4 x i64> %a, %b
3905 store <4 x i64> %c, ptr %x
3909 define void @shl_v32i8(ptr %x, ptr %y) {
3910 ; LMULMAX2-LABEL: shl_v32i8:
3911 ; LMULMAX2: # %bb.0:
3912 ; LMULMAX2-NEXT: li a2, 32
3913 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
3914 ; LMULMAX2-NEXT: vle8.v v8, (a0)
3915 ; LMULMAX2-NEXT: vle8.v v10, (a1)
3916 ; LMULMAX2-NEXT: vsll.vv v8, v8, v10
3917 ; LMULMAX2-NEXT: vse8.v v8, (a0)
3918 ; LMULMAX2-NEXT: ret
3920 ; LMULMAX1-RV32-LABEL: shl_v32i8:
3921 ; LMULMAX1-RV32: # %bb.0:
3922 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3923 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
3924 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3925 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
3926 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3927 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
3928 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
3929 ; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10
3930 ; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11
3931 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
3932 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
3933 ; LMULMAX1-RV32-NEXT: ret
3935 ; LMULMAX1-RV64-LABEL: shl_v32i8:
3936 ; LMULMAX1-RV64: # %bb.0:
3937 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
3938 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
3939 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3940 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
3941 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3942 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
3943 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
3944 ; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9
3945 ; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11
3946 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
3947 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
3948 ; LMULMAX1-RV64-NEXT: ret
3949 %a = load <32 x i8>, ptr %x
3950 %b = load <32 x i8>, ptr %y
3951 %c = shl <32 x i8> %a, %b
3952 store <32 x i8> %c, ptr %x
3956 define void @shl_v16i16(ptr %x, ptr %y) {
3957 ; LMULMAX2-LABEL: shl_v16i16:
3958 ; LMULMAX2: # %bb.0:
3959 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
3960 ; LMULMAX2-NEXT: vle16.v v8, (a0)
3961 ; LMULMAX2-NEXT: vle16.v v10, (a1)
3962 ; LMULMAX2-NEXT: vsll.vv v8, v8, v10
3963 ; LMULMAX2-NEXT: vse16.v v8, (a0)
3964 ; LMULMAX2-NEXT: ret
3966 ; LMULMAX1-RV32-LABEL: shl_v16i16:
3967 ; LMULMAX1-RV32: # %bb.0:
3968 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3969 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
3970 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
3971 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
3972 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
3973 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
3974 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
3975 ; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10
3976 ; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11
3977 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
3978 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
3979 ; LMULMAX1-RV32-NEXT: ret
3981 ; LMULMAX1-RV64-LABEL: shl_v16i16:
3982 ; LMULMAX1-RV64: # %bb.0:
3983 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3984 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
3985 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
3986 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
3987 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
3988 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
3989 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
3990 ; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9
3991 ; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11
3992 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
3993 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
3994 ; LMULMAX1-RV64-NEXT: ret
3995 %a = load <16 x i16>, ptr %x
3996 %b = load <16 x i16>, ptr %y
3997 %c = shl <16 x i16> %a, %b
3998 store <16 x i16> %c, ptr %x
4002 define void @shl_v8i32(ptr %x, ptr %y) {
4003 ; LMULMAX2-LABEL: shl_v8i32:
4004 ; LMULMAX2: # %bb.0:
4005 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4006 ; LMULMAX2-NEXT: vle32.v v8, (a0)
4007 ; LMULMAX2-NEXT: vle32.v v10, (a1)
4008 ; LMULMAX2-NEXT: vsll.vv v8, v8, v10
4009 ; LMULMAX2-NEXT: vse32.v v8, (a0)
4010 ; LMULMAX2-NEXT: ret
4012 ; LMULMAX1-RV32-LABEL: shl_v8i32:
4013 ; LMULMAX1-RV32: # %bb.0:
4014 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4015 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
4016 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4017 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
4018 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4019 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
4020 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
4021 ; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10
4022 ; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11
4023 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
4024 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
4025 ; LMULMAX1-RV32-NEXT: ret
4027 ; LMULMAX1-RV64-LABEL: shl_v8i32:
4028 ; LMULMAX1-RV64: # %bb.0:
4029 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4030 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
4031 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4032 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
4033 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4034 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
4035 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
4036 ; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9
4037 ; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11
4038 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
4039 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
4040 ; LMULMAX1-RV64-NEXT: ret
4041 %a = load <8 x i32>, ptr %x
4042 %b = load <8 x i32>, ptr %y
4043 %c = shl <8 x i32> %a, %b
4044 store <8 x i32> %c, ptr %x
4048 define void @shl_v4i64(ptr %x, ptr %y) {
4049 ; LMULMAX2-LABEL: shl_v4i64:
4050 ; LMULMAX2: # %bb.0:
4051 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4052 ; LMULMAX2-NEXT: vle64.v v8, (a0)
4053 ; LMULMAX2-NEXT: vle64.v v10, (a1)
4054 ; LMULMAX2-NEXT: vsll.vv v8, v8, v10
4055 ; LMULMAX2-NEXT: vse64.v v8, (a0)
4056 ; LMULMAX2-NEXT: ret
4058 ; LMULMAX1-RV32-LABEL: shl_v4i64:
4059 ; LMULMAX1-RV32: # %bb.0:
4060 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4061 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
4062 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4063 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
4064 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4065 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
4066 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
4067 ; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10
4068 ; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11
4069 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
4070 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
4071 ; LMULMAX1-RV32-NEXT: ret
4073 ; LMULMAX1-RV64-LABEL: shl_v4i64:
4074 ; LMULMAX1-RV64: # %bb.0:
4075 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4076 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
4077 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4078 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
4079 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4080 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
4081 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
4082 ; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9
4083 ; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11
4084 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
4085 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
4086 ; LMULMAX1-RV64-NEXT: ret
4087 %a = load <4 x i64>, ptr %x
4088 %b = load <4 x i64>, ptr %y
4089 %c = shl <4 x i64> %a, %b
4090 store <4 x i64> %c, ptr %x
4094 define void @sdiv_v32i8(ptr %x, ptr %y) {
4095 ; LMULMAX2-LABEL: sdiv_v32i8:
4096 ; LMULMAX2: # %bb.0:
4097 ; LMULMAX2-NEXT: li a2, 32
4098 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
4099 ; LMULMAX2-NEXT: vle8.v v8, (a0)
4100 ; LMULMAX2-NEXT: vle8.v v10, (a1)
4101 ; LMULMAX2-NEXT: vdiv.vv v8, v8, v10
4102 ; LMULMAX2-NEXT: vse8.v v8, (a0)
4103 ; LMULMAX2-NEXT: ret
4105 ; LMULMAX1-RV32-LABEL: sdiv_v32i8:
4106 ; LMULMAX1-RV32: # %bb.0:
4107 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
4108 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
4109 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4110 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
4111 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4112 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
4113 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
4114 ; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10
4115 ; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11
4116 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
4117 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
4118 ; LMULMAX1-RV32-NEXT: ret
4120 ; LMULMAX1-RV64-LABEL: sdiv_v32i8:
4121 ; LMULMAX1-RV64: # %bb.0:
4122 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
4123 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
4124 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4125 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
4126 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4127 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
4128 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
4129 ; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9
4130 ; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11
4131 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
4132 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
4133 ; LMULMAX1-RV64-NEXT: ret
4134 %a = load <32 x i8>, ptr %x
4135 %b = load <32 x i8>, ptr %y
4136 %c = sdiv <32 x i8> %a, %b
4137 store <32 x i8> %c, ptr %x
4141 define void @sdiv_v16i16(ptr %x, ptr %y) {
4142 ; LMULMAX2-LABEL: sdiv_v16i16:
4143 ; LMULMAX2: # %bb.0:
4144 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
4145 ; LMULMAX2-NEXT: vle16.v v8, (a0)
4146 ; LMULMAX2-NEXT: vle16.v v10, (a1)
4147 ; LMULMAX2-NEXT: vdiv.vv v8, v8, v10
4148 ; LMULMAX2-NEXT: vse16.v v8, (a0)
4149 ; LMULMAX2-NEXT: ret
4151 ; LMULMAX1-RV32-LABEL: sdiv_v16i16:
4152 ; LMULMAX1-RV32: # %bb.0:
4153 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4154 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
4155 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4156 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
4157 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4158 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
4159 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
4160 ; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10
4161 ; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11
4162 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
4163 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
4164 ; LMULMAX1-RV32-NEXT: ret
4166 ; LMULMAX1-RV64-LABEL: sdiv_v16i16:
4167 ; LMULMAX1-RV64: # %bb.0:
4168 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4169 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
4170 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4171 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
4172 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4173 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
4174 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
4175 ; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9
4176 ; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11
4177 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
4178 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
4179 ; LMULMAX1-RV64-NEXT: ret
4180 %a = load <16 x i16>, ptr %x
4181 %b = load <16 x i16>, ptr %y
4182 %c = sdiv <16 x i16> %a, %b
4183 store <16 x i16> %c, ptr %x
4187 define void @sdiv_v8i32(ptr %x, ptr %y) {
4188 ; LMULMAX2-LABEL: sdiv_v8i32:
4189 ; LMULMAX2: # %bb.0:
4190 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4191 ; LMULMAX2-NEXT: vle32.v v8, (a0)
4192 ; LMULMAX2-NEXT: vle32.v v10, (a1)
4193 ; LMULMAX2-NEXT: vdiv.vv v8, v8, v10
4194 ; LMULMAX2-NEXT: vse32.v v8, (a0)
4195 ; LMULMAX2-NEXT: ret
4197 ; LMULMAX1-RV32-LABEL: sdiv_v8i32:
4198 ; LMULMAX1-RV32: # %bb.0:
4199 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4200 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
4201 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4202 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
4203 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4204 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
4205 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
4206 ; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10
4207 ; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11
4208 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
4209 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
4210 ; LMULMAX1-RV32-NEXT: ret
4212 ; LMULMAX1-RV64-LABEL: sdiv_v8i32:
4213 ; LMULMAX1-RV64: # %bb.0:
4214 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4215 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
4216 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4217 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
4218 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4219 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
4220 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
4221 ; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9
4222 ; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11
4223 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
4224 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
4225 ; LMULMAX1-RV64-NEXT: ret
4226 %a = load <8 x i32>, ptr %x
4227 %b = load <8 x i32>, ptr %y
4228 %c = sdiv <8 x i32> %a, %b
4229 store <8 x i32> %c, ptr %x
4233 define void @sdiv_v4i64(ptr %x, ptr %y) {
4234 ; LMULMAX2-LABEL: sdiv_v4i64:
4235 ; LMULMAX2: # %bb.0:
4236 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4237 ; LMULMAX2-NEXT: vle64.v v8, (a0)
4238 ; LMULMAX2-NEXT: vle64.v v10, (a1)
4239 ; LMULMAX2-NEXT: vdiv.vv v8, v8, v10
4240 ; LMULMAX2-NEXT: vse64.v v8, (a0)
4241 ; LMULMAX2-NEXT: ret
4243 ; LMULMAX1-RV32-LABEL: sdiv_v4i64:
4244 ; LMULMAX1-RV32: # %bb.0:
4245 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4246 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
4247 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4248 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
4249 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4250 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
4251 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
4252 ; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10
4253 ; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11
4254 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
4255 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
4256 ; LMULMAX1-RV32-NEXT: ret
4258 ; LMULMAX1-RV64-LABEL: sdiv_v4i64:
4259 ; LMULMAX1-RV64: # %bb.0:
4260 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4261 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
4262 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4263 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
4264 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4265 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
4266 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
4267 ; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9
4268 ; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11
4269 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
4270 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
4271 ; LMULMAX1-RV64-NEXT: ret
4272 %a = load <4 x i64>, ptr %x
4273 %b = load <4 x i64>, ptr %y
4274 %c = sdiv <4 x i64> %a, %b
4275 store <4 x i64> %c, ptr %x
4279 define void @srem_v32i8(ptr %x, ptr %y) {
4280 ; LMULMAX2-LABEL: srem_v32i8:
4281 ; LMULMAX2: # %bb.0:
4282 ; LMULMAX2-NEXT: li a2, 32
4283 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
4284 ; LMULMAX2-NEXT: vle8.v v8, (a0)
4285 ; LMULMAX2-NEXT: vle8.v v10, (a1)
4286 ; LMULMAX2-NEXT: vrem.vv v8, v8, v10
4287 ; LMULMAX2-NEXT: vse8.v v8, (a0)
4288 ; LMULMAX2-NEXT: ret
4290 ; LMULMAX1-RV32-LABEL: srem_v32i8:
4291 ; LMULMAX1-RV32: # %bb.0:
4292 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
4293 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
4294 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4295 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
4296 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4297 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
4298 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
4299 ; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10
4300 ; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11
4301 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
4302 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
4303 ; LMULMAX1-RV32-NEXT: ret
4305 ; LMULMAX1-RV64-LABEL: srem_v32i8:
4306 ; LMULMAX1-RV64: # %bb.0:
4307 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
4308 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
4309 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4310 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
4311 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4312 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
4313 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
4314 ; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9
4315 ; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11
4316 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
4317 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
4318 ; LMULMAX1-RV64-NEXT: ret
4319 %a = load <32 x i8>, ptr %x
4320 %b = load <32 x i8>, ptr %y
4321 %c = srem <32 x i8> %a, %b
4322 store <32 x i8> %c, ptr %x
4326 define void @srem_v16i16(ptr %x, ptr %y) {
4327 ; LMULMAX2-LABEL: srem_v16i16:
4328 ; LMULMAX2: # %bb.0:
4329 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
4330 ; LMULMAX2-NEXT: vle16.v v8, (a0)
4331 ; LMULMAX2-NEXT: vle16.v v10, (a1)
4332 ; LMULMAX2-NEXT: vrem.vv v8, v8, v10
4333 ; LMULMAX2-NEXT: vse16.v v8, (a0)
4334 ; LMULMAX2-NEXT: ret
4336 ; LMULMAX1-RV32-LABEL: srem_v16i16:
4337 ; LMULMAX1-RV32: # %bb.0:
4338 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4339 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
4340 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4341 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
4342 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4343 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
4344 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
4345 ; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10
4346 ; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11
4347 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
4348 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
4349 ; LMULMAX1-RV32-NEXT: ret
4351 ; LMULMAX1-RV64-LABEL: srem_v16i16:
4352 ; LMULMAX1-RV64: # %bb.0:
4353 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4354 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
4355 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4356 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
4357 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4358 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
4359 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
4360 ; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9
4361 ; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11
4362 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
4363 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
4364 ; LMULMAX1-RV64-NEXT: ret
4365 %a = load <16 x i16>, ptr %x
4366 %b = load <16 x i16>, ptr %y
4367 %c = srem <16 x i16> %a, %b
4368 store <16 x i16> %c, ptr %x
4372 define void @srem_v8i32(ptr %x, ptr %y) {
4373 ; LMULMAX2-LABEL: srem_v8i32:
4374 ; LMULMAX2: # %bb.0:
4375 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4376 ; LMULMAX2-NEXT: vle32.v v8, (a0)
4377 ; LMULMAX2-NEXT: vle32.v v10, (a1)
4378 ; LMULMAX2-NEXT: vrem.vv v8, v8, v10
4379 ; LMULMAX2-NEXT: vse32.v v8, (a0)
4380 ; LMULMAX2-NEXT: ret
4382 ; LMULMAX1-RV32-LABEL: srem_v8i32:
4383 ; LMULMAX1-RV32: # %bb.0:
4384 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4385 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
4386 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4387 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
4388 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4389 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
4390 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
4391 ; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10
4392 ; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11
4393 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
4394 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
4395 ; LMULMAX1-RV32-NEXT: ret
4397 ; LMULMAX1-RV64-LABEL: srem_v8i32:
4398 ; LMULMAX1-RV64: # %bb.0:
4399 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4400 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
4401 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4402 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
4403 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4404 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
4405 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
4406 ; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9
4407 ; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11
4408 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
4409 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
4410 ; LMULMAX1-RV64-NEXT: ret
4411 %a = load <8 x i32>, ptr %x
4412 %b = load <8 x i32>, ptr %y
4413 %c = srem <8 x i32> %a, %b
4414 store <8 x i32> %c, ptr %x
4418 define void @srem_v4i64(ptr %x, ptr %y) {
4419 ; LMULMAX2-LABEL: srem_v4i64:
4420 ; LMULMAX2: # %bb.0:
4421 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4422 ; LMULMAX2-NEXT: vle64.v v8, (a0)
4423 ; LMULMAX2-NEXT: vle64.v v10, (a1)
4424 ; LMULMAX2-NEXT: vrem.vv v8, v8, v10
4425 ; LMULMAX2-NEXT: vse64.v v8, (a0)
4426 ; LMULMAX2-NEXT: ret
4428 ; LMULMAX1-RV32-LABEL: srem_v4i64:
4429 ; LMULMAX1-RV32: # %bb.0:
4430 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4431 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
4432 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4433 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
4434 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4435 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
4436 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
4437 ; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10
4438 ; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11
4439 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
4440 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
4441 ; LMULMAX1-RV32-NEXT: ret
4443 ; LMULMAX1-RV64-LABEL: srem_v4i64:
4444 ; LMULMAX1-RV64: # %bb.0:
4445 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4446 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
4447 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4448 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
4449 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4450 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
4451 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
4452 ; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9
4453 ; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11
4454 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
4455 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
4456 ; LMULMAX1-RV64-NEXT: ret
4457 %a = load <4 x i64>, ptr %x
4458 %b = load <4 x i64>, ptr %y
4459 %c = srem <4 x i64> %a, %b
4460 store <4 x i64> %c, ptr %x
4464 define void @udiv_v32i8(ptr %x, ptr %y) {
4465 ; LMULMAX2-LABEL: udiv_v32i8:
4466 ; LMULMAX2: # %bb.0:
4467 ; LMULMAX2-NEXT: li a2, 32
4468 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
4469 ; LMULMAX2-NEXT: vle8.v v8, (a0)
4470 ; LMULMAX2-NEXT: vle8.v v10, (a1)
4471 ; LMULMAX2-NEXT: vdivu.vv v8, v8, v10
4472 ; LMULMAX2-NEXT: vse8.v v8, (a0)
4473 ; LMULMAX2-NEXT: ret
4475 ; LMULMAX1-RV32-LABEL: udiv_v32i8:
4476 ; LMULMAX1-RV32: # %bb.0:
4477 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
4478 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
4479 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4480 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
4481 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4482 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
4483 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
4484 ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10
4485 ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11
4486 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
4487 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
4488 ; LMULMAX1-RV32-NEXT: ret
4490 ; LMULMAX1-RV64-LABEL: udiv_v32i8:
4491 ; LMULMAX1-RV64: # %bb.0:
4492 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
4493 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
4494 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4495 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
4496 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4497 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
4498 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
4499 ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9
4500 ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11
4501 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
4502 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
4503 ; LMULMAX1-RV64-NEXT: ret
4504 %a = load <32 x i8>, ptr %x
4505 %b = load <32 x i8>, ptr %y
4506 %c = udiv <32 x i8> %a, %b
4507 store <32 x i8> %c, ptr %x
4511 define void @udiv_v16i16(ptr %x, ptr %y) {
4512 ; LMULMAX2-LABEL: udiv_v16i16:
4513 ; LMULMAX2: # %bb.0:
4514 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
4515 ; LMULMAX2-NEXT: vle16.v v8, (a0)
4516 ; LMULMAX2-NEXT: vle16.v v10, (a1)
4517 ; LMULMAX2-NEXT: vdivu.vv v8, v8, v10
4518 ; LMULMAX2-NEXT: vse16.v v8, (a0)
4519 ; LMULMAX2-NEXT: ret
4521 ; LMULMAX1-RV32-LABEL: udiv_v16i16:
4522 ; LMULMAX1-RV32: # %bb.0:
4523 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4524 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
4525 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4526 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
4527 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4528 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
4529 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
4530 ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10
4531 ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11
4532 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
4533 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
4534 ; LMULMAX1-RV32-NEXT: ret
4536 ; LMULMAX1-RV64-LABEL: udiv_v16i16:
4537 ; LMULMAX1-RV64: # %bb.0:
4538 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4539 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
4540 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4541 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
4542 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4543 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
4544 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
4545 ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9
4546 ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11
4547 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
4548 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
4549 ; LMULMAX1-RV64-NEXT: ret
4550 %a = load <16 x i16>, ptr %x
4551 %b = load <16 x i16>, ptr %y
4552 %c = udiv <16 x i16> %a, %b
4553 store <16 x i16> %c, ptr %x
4557 define void @udiv_v8i32(ptr %x, ptr %y) {
4558 ; LMULMAX2-LABEL: udiv_v8i32:
4559 ; LMULMAX2: # %bb.0:
4560 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4561 ; LMULMAX2-NEXT: vle32.v v8, (a0)
4562 ; LMULMAX2-NEXT: vle32.v v10, (a1)
4563 ; LMULMAX2-NEXT: vdivu.vv v8, v8, v10
4564 ; LMULMAX2-NEXT: vse32.v v8, (a0)
4565 ; LMULMAX2-NEXT: ret
4567 ; LMULMAX1-RV32-LABEL: udiv_v8i32:
4568 ; LMULMAX1-RV32: # %bb.0:
4569 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4570 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
4571 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4572 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
4573 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4574 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
4575 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
4576 ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10
4577 ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11
4578 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
4579 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
4580 ; LMULMAX1-RV32-NEXT: ret
4582 ; LMULMAX1-RV64-LABEL: udiv_v8i32:
4583 ; LMULMAX1-RV64: # %bb.0:
4584 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4585 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
4586 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4587 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
4588 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4589 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
4590 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
4591 ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9
4592 ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11
4593 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
4594 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
4595 ; LMULMAX1-RV64-NEXT: ret
4596 %a = load <8 x i32>, ptr %x
4597 %b = load <8 x i32>, ptr %y
4598 %c = udiv <8 x i32> %a, %b
4599 store <8 x i32> %c, ptr %x
4603 define void @udiv_v4i64(ptr %x, ptr %y) {
4604 ; LMULMAX2-LABEL: udiv_v4i64:
4605 ; LMULMAX2: # %bb.0:
4606 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4607 ; LMULMAX2-NEXT: vle64.v v8, (a0)
4608 ; LMULMAX2-NEXT: vle64.v v10, (a1)
4609 ; LMULMAX2-NEXT: vdivu.vv v8, v8, v10
4610 ; LMULMAX2-NEXT: vse64.v v8, (a0)
4611 ; LMULMAX2-NEXT: ret
4613 ; LMULMAX1-RV32-LABEL: udiv_v4i64:
4614 ; LMULMAX1-RV32: # %bb.0:
4615 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4616 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
4617 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4618 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
4619 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4620 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
4621 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
4622 ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10
4623 ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11
4624 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
4625 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
4626 ; LMULMAX1-RV32-NEXT: ret
4628 ; LMULMAX1-RV64-LABEL: udiv_v4i64:
4629 ; LMULMAX1-RV64: # %bb.0:
4630 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4631 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
4632 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4633 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
4634 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4635 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
4636 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
4637 ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9
4638 ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11
4639 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
4640 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
4641 ; LMULMAX1-RV64-NEXT: ret
4642 %a = load <4 x i64>, ptr %x
4643 %b = load <4 x i64>, ptr %y
4644 %c = udiv <4 x i64> %a, %b
4645 store <4 x i64> %c, ptr %x
4649 define void @urem_v32i8(ptr %x, ptr %y) {
4650 ; LMULMAX2-LABEL: urem_v32i8:
4651 ; LMULMAX2: # %bb.0:
4652 ; LMULMAX2-NEXT: li a2, 32
4653 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
4654 ; LMULMAX2-NEXT: vle8.v v8, (a0)
4655 ; LMULMAX2-NEXT: vle8.v v10, (a1)
4656 ; LMULMAX2-NEXT: vremu.vv v8, v8, v10
4657 ; LMULMAX2-NEXT: vse8.v v8, (a0)
4658 ; LMULMAX2-NEXT: ret
4660 ; LMULMAX1-RV32-LABEL: urem_v32i8:
4661 ; LMULMAX1-RV32: # %bb.0:
4662 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
4663 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
4664 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4665 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
4666 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4667 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
4668 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
4669 ; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10
4670 ; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11
4671 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
4672 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
4673 ; LMULMAX1-RV32-NEXT: ret
4675 ; LMULMAX1-RV64-LABEL: urem_v32i8:
4676 ; LMULMAX1-RV64: # %bb.0:
4677 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
4678 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
4679 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4680 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
4681 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4682 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
4683 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
4684 ; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9
4685 ; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11
4686 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
4687 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
4688 ; LMULMAX1-RV64-NEXT: ret
4689 %a = load <32 x i8>, ptr %x
4690 %b = load <32 x i8>, ptr %y
4691 %c = urem <32 x i8> %a, %b
4692 store <32 x i8> %c, ptr %x
4696 define void @urem_v16i16(ptr %x, ptr %y) {
4697 ; LMULMAX2-LABEL: urem_v16i16:
4698 ; LMULMAX2: # %bb.0:
4699 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
4700 ; LMULMAX2-NEXT: vle16.v v8, (a0)
4701 ; LMULMAX2-NEXT: vle16.v v10, (a1)
4702 ; LMULMAX2-NEXT: vremu.vv v8, v8, v10
4703 ; LMULMAX2-NEXT: vse16.v v8, (a0)
4704 ; LMULMAX2-NEXT: ret
4706 ; LMULMAX1-RV32-LABEL: urem_v16i16:
4707 ; LMULMAX1-RV32: # %bb.0:
4708 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4709 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
4710 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4711 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
4712 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4713 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
4714 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
4715 ; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10
4716 ; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11
4717 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
4718 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
4719 ; LMULMAX1-RV32-NEXT: ret
4721 ; LMULMAX1-RV64-LABEL: urem_v16i16:
4722 ; LMULMAX1-RV64: # %bb.0:
4723 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4724 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
4725 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4726 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
4727 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4728 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
4729 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
4730 ; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9
4731 ; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11
4732 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
4733 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
4734 ; LMULMAX1-RV64-NEXT: ret
4735 %a = load <16 x i16>, ptr %x
4736 %b = load <16 x i16>, ptr %y
4737 %c = urem <16 x i16> %a, %b
4738 store <16 x i16> %c, ptr %x
4742 define void @urem_v8i32(ptr %x, ptr %y) {
4743 ; LMULMAX2-LABEL: urem_v8i32:
4744 ; LMULMAX2: # %bb.0:
4745 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4746 ; LMULMAX2-NEXT: vle32.v v8, (a0)
4747 ; LMULMAX2-NEXT: vle32.v v10, (a1)
4748 ; LMULMAX2-NEXT: vremu.vv v8, v8, v10
4749 ; LMULMAX2-NEXT: vse32.v v8, (a0)
4750 ; LMULMAX2-NEXT: ret
4752 ; LMULMAX1-RV32-LABEL: urem_v8i32:
4753 ; LMULMAX1-RV32: # %bb.0:
4754 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4755 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
4756 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4757 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
4758 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4759 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
4760 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
4761 ; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10
4762 ; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11
4763 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
4764 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
4765 ; LMULMAX1-RV32-NEXT: ret
4767 ; LMULMAX1-RV64-LABEL: urem_v8i32:
4768 ; LMULMAX1-RV64: # %bb.0:
4769 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4770 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
4771 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4772 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
4773 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4774 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
4775 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
4776 ; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9
4777 ; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11
4778 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
4779 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
4780 ; LMULMAX1-RV64-NEXT: ret
4781 %a = load <8 x i32>, ptr %x
4782 %b = load <8 x i32>, ptr %y
4783 %c = urem <8 x i32> %a, %b
4784 store <8 x i32> %c, ptr %x
4788 define void @urem_v4i64(ptr %x, ptr %y) {
4789 ; LMULMAX2-LABEL: urem_v4i64:
4790 ; LMULMAX2: # %bb.0:
4791 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4792 ; LMULMAX2-NEXT: vle64.v v8, (a0)
4793 ; LMULMAX2-NEXT: vle64.v v10, (a1)
4794 ; LMULMAX2-NEXT: vremu.vv v8, v8, v10
4795 ; LMULMAX2-NEXT: vse64.v v8, (a0)
4796 ; LMULMAX2-NEXT: ret
4798 ; LMULMAX1-RV32-LABEL: urem_v4i64:
4799 ; LMULMAX1-RV32: # %bb.0:
4800 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4801 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
4802 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
4803 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
4804 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
4805 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
4806 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
4807 ; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10
4808 ; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11
4809 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
4810 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
4811 ; LMULMAX1-RV32-NEXT: ret
4813 ; LMULMAX1-RV64-LABEL: urem_v4i64:
4814 ; LMULMAX1-RV64: # %bb.0:
4815 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4816 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
4817 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
4818 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
4819 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
4820 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
4821 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
4822 ; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9
4823 ; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11
4824 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
4825 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
4826 ; LMULMAX1-RV64-NEXT: ret
4827 %a = load <4 x i64>, ptr %x
4828 %b = load <4 x i64>, ptr %y
4829 %c = urem <4 x i64> %a, %b
4830 store <4 x i64> %c, ptr %x
4834 define void @extract_v4i64(ptr %x, ptr %y) {
4835 ; LMULMAX2-LABEL: extract_v4i64:
4836 ; LMULMAX2: # %bb.0:
4837 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
4838 ; LMULMAX2-NEXT: vle64.v v8, (a0)
4839 ; LMULMAX2-NEXT: vle64.v v10, (a1)
4840 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
4841 ; LMULMAX2-NEXT: vse64.v v8, (a0)
4842 ; LMULMAX2-NEXT: ret
4844 ; LMULMAX1-LABEL: extract_v4i64:
4845 ; LMULMAX1: # %bb.0:
4846 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4847 ; LMULMAX1-NEXT: vle64.v v8, (a0)
4848 ; LMULMAX1-NEXT: addi a2, a0, 16
4849 ; LMULMAX1-NEXT: vle64.v v9, (a2)
4850 ; LMULMAX1-NEXT: vle64.v v10, (a1)
4851 ; LMULMAX1-NEXT: addi a1, a1, 16
4852 ; LMULMAX1-NEXT: vle64.v v11, (a1)
4853 ; LMULMAX1-NEXT: vadd.vv v9, v9, v11
4854 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10
4855 ; LMULMAX1-NEXT: vse64.v v8, (a0)
4856 ; LMULMAX1-NEXT: vse64.v v9, (a2)
4857 ; LMULMAX1-NEXT: ret
4858 %a = load <4 x i64>, ptr %x
4859 %b = load <4 x i64>, ptr %y
4862 %c = add <4 x i64> %a, %b
4863 store <4 x i64> %c, ptr %x
4867 define void @mulhu_v32i8(ptr %x) {
4868 ; LMULMAX2-LABEL: mulhu_v32i8:
4869 ; LMULMAX2: # %bb.0:
4870 ; LMULMAX2-NEXT: li a1, 32
4871 ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
4872 ; LMULMAX2-NEXT: vle8.v v8, (a0)
4873 ; LMULMAX2-NEXT: vmv.v.i v10, 0
4874 ; LMULMAX2-NEXT: lui a1, 163907
4875 ; LMULMAX2-NEXT: addi a1, a1, -2044
4876 ; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
4877 ; LMULMAX2-NEXT: vmv.s.x v0, a1
4878 ; LMULMAX2-NEXT: li a1, -128
4879 ; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
4880 ; LMULMAX2-NEXT: vmerge.vxm v12, v10, a1, v0
4881 ; LMULMAX2-NEXT: lui a1, 66049
4882 ; LMULMAX2-NEXT: addi a1, a1, 32
4883 ; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
4884 ; LMULMAX2-NEXT: vmv.s.x v0, a1
4885 ; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
4886 ; LMULMAX2-NEXT: lui a1, %hi(.LCPI181_0)
4887 ; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI181_0)
4888 ; LMULMAX2-NEXT: vle8.v v14, (a1)
4889 ; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
4890 ; LMULMAX2-NEXT: vsrl.vv v10, v8, v10
4891 ; LMULMAX2-NEXT: vmulhu.vv v10, v10, v14
4892 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
4893 ; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12
4894 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
4895 ; LMULMAX2-NEXT: vmv.v.i v10, 4
4896 ; LMULMAX2-NEXT: lui a1, 8208
4897 ; LMULMAX2-NEXT: addi a1, a1, 513
4898 ; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
4899 ; LMULMAX2-NEXT: vmv.s.x v0, a1
4900 ; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
4901 ; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
4902 ; LMULMAX2-NEXT: lui a1, 66785
4903 ; LMULMAX2-NEXT: addi a1, a1, 78
4904 ; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
4905 ; LMULMAX2-NEXT: vmv.s.x v0, a1
4906 ; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
4907 ; LMULMAX2-NEXT: vmerge.vim v10, v10, 3, v0
4908 ; LMULMAX2-NEXT: lui a1, 529160
4909 ; LMULMAX2-NEXT: addi a1, a1, 304
4910 ; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
4911 ; LMULMAX2-NEXT: vmv.s.x v0, a1
4912 ; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
4913 ; LMULMAX2-NEXT: vmerge.vim v10, v10, 2, v0
4914 ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
4915 ; LMULMAX2-NEXT: vse8.v v8, (a0)
4916 ; LMULMAX2-NEXT: ret
4918 ; LMULMAX1-LABEL: mulhu_v32i8:
4919 ; LMULMAX1: # %bb.0:
4920 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
4921 ; LMULMAX1-NEXT: addi a1, a0, 16
4922 ; LMULMAX1-NEXT: vle8.v v8, (a1)
4923 ; LMULMAX1-NEXT: lui a2, %hi(.LCPI181_0)
4924 ; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI181_0)
4925 ; LMULMAX1-NEXT: vle8.v v9, (a2)
4926 ; LMULMAX1-NEXT: vle8.v v10, (a0)
4927 ; LMULMAX1-NEXT: vdivu.vv v8, v8, v9
4928 ; LMULMAX1-NEXT: vdivu.vv v9, v10, v9
4929 ; LMULMAX1-NEXT: vse8.v v9, (a0)
4930 ; LMULMAX1-NEXT: vse8.v v8, (a1)
4931 ; LMULMAX1-NEXT: ret
4932 %a = load <32 x i8>, ptr %x
4933 %b = udiv <32 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25>
4934 store <32 x i8> %b, ptr %x
4938 define void @mulhu_v16i16(ptr %x) {
4939 ; LMULMAX2-RV32-LABEL: mulhu_v16i16:
4940 ; LMULMAX2-RV32: # %bb.0:
4941 ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
4942 ; LMULMAX2-RV32-NEXT: vle16.v v10, (a0)
4943 ; LMULMAX2-RV32-NEXT: li a1, 257
4944 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1
4945 ; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0
4946 ; LMULMAX2-RV32-NEXT: lui a1, 1048568
4947 ; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v8, a1, v0
4948 ; LMULMAX2-RV32-NEXT: lui a1, 4
4949 ; LMULMAX2-RV32-NEXT: addi a1, a1, 64
4950 ; LMULMAX2-RV32-NEXT: vmv.s.x v8, a1
4951 ; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
4952 ; LMULMAX2-RV32-NEXT: vmv.v.i v9, 0
4953 ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8
4954 ; LMULMAX2-RV32-NEXT: vmerge.vim v9, v9, 1, v0
4955 ; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4956 ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI182_0)
4957 ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI182_0)
4958 ; LMULMAX2-RV32-NEXT: vle16.v v14, (a1)
4959 ; LMULMAX2-RV32-NEXT: vsext.vf2 v16, v9
4960 ; LMULMAX2-RV32-NEXT: vsrl.vv v16, v10, v16
4961 ; LMULMAX2-RV32-NEXT: vmulhu.vv v14, v16, v14
4962 ; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v14
4963 ; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v12
4964 ; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v14
4965 ; LMULMAX2-RV32-NEXT: lui a1, 2
4966 ; LMULMAX2-RV32-NEXT: addi a1, a1, 289
4967 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1
4968 ; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
4969 ; LMULMAX2-RV32-NEXT: vmv.v.i v9, 3
4970 ; LMULMAX2-RV32-NEXT: vmerge.vim v9, v9, 2, v0
4971 ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8
4972 ; LMULMAX2-RV32-NEXT: vmerge.vim v8, v9, 1, v0
4973 ; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4974 ; LMULMAX2-RV32-NEXT: vsext.vf2 v12, v8
4975 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v10, v12
4976 ; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
4977 ; LMULMAX2-RV32-NEXT: ret
4979 ; LMULMAX2-RV64-LABEL: mulhu_v16i16:
4980 ; LMULMAX2-RV64: # %bb.0:
4981 ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
4982 ; LMULMAX2-RV64-NEXT: vle16.v v8, (a0)
4983 ; LMULMAX2-RV64-NEXT: li a1, 257
4984 ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1
4985 ; LMULMAX2-RV64-NEXT: vmv.v.i v10, 0
4986 ; LMULMAX2-RV64-NEXT: lui a1, 1048568
4987 ; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0
4988 ; LMULMAX2-RV64-NEXT: li a1, 1
4989 ; LMULMAX2-RV64-NEXT: slli a1, a1, 48
4990 ; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4991 ; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1
4992 ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
4993 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_0)
4994 ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_0)
4995 ; LMULMAX2-RV64-NEXT: vle16.v v14, (a1)
4996 ; LMULMAX2-RV64-NEXT: vsext.vf2 v16, v12
4997 ; LMULMAX2-RV64-NEXT: vsrl.vv v12, v8, v16
4998 ; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v12, v14
4999 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v12
5000 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10
5001 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v12
5002 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_1)
5003 ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_1)
5004 ; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5005 ; LMULMAX2-RV64-NEXT: vlse64.v v10, (a1), zero
5006 ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
5007 ; LMULMAX2-RV64-NEXT: vsext.vf2 v12, v10
5008 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12
5009 ; LMULMAX2-RV64-NEXT: vse16.v v8, (a0)
5010 ; LMULMAX2-RV64-NEXT: ret
5012 ; LMULMAX1-LABEL: mulhu_v16i16:
5013 ; LMULMAX1: # %bb.0:
5014 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5015 ; LMULMAX1-NEXT: addi a1, a0, 16
5016 ; LMULMAX1-NEXT: vle16.v v8, (a1)
5017 ; LMULMAX1-NEXT: lui a2, %hi(.LCPI182_0)
5018 ; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI182_0)
5019 ; LMULMAX1-NEXT: vle16.v v9, (a2)
5020 ; LMULMAX1-NEXT: vle16.v v10, (a0)
5021 ; LMULMAX1-NEXT: vdivu.vv v8, v8, v9
5022 ; LMULMAX1-NEXT: vdivu.vv v9, v10, v9
5023 ; LMULMAX1-NEXT: vse16.v v9, (a0)
5024 ; LMULMAX1-NEXT: vse16.v v8, (a1)
5025 ; LMULMAX1-NEXT: ret
5026 %a = load <16 x i16>, ptr %x
5027 %b = udiv <16 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
5028 store <16 x i16> %b, ptr %x
5032 define void @mulhu_v8i32(ptr %x) {
5033 ; LMULMAX2-LABEL: mulhu_v8i32:
5034 ; LMULMAX2: # %bb.0:
5035 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5036 ; LMULMAX2-NEXT: vle32.v v8, (a0)
5037 ; LMULMAX2-NEXT: li a1, 68
5038 ; LMULMAX2-NEXT: vmv.s.x v0, a1
5039 ; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0)
5040 ; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0)
5041 ; LMULMAX2-NEXT: vle32.v v10, (a1)
5042 ; LMULMAX2-NEXT: vmv.v.i v12, 0
5043 ; LMULMAX2-NEXT: lui a1, 524288
5044 ; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0
5045 ; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10
5046 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10
5047 ; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12
5048 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
5049 ; LMULMAX2-NEXT: lui a1, 4128
5050 ; LMULMAX2-NEXT: addi a1, a1, 514
5051 ; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
5052 ; LMULMAX2-NEXT: vmv.v.x v10, a1
5053 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5054 ; LMULMAX2-NEXT: vsext.vf4 v12, v10
5055 ; LMULMAX2-NEXT: vsrl.vv v8, v8, v12
5056 ; LMULMAX2-NEXT: vse32.v v8, (a0)
5057 ; LMULMAX2-NEXT: ret
5059 ; LMULMAX1-RV32-LABEL: mulhu_v8i32:
5060 ; LMULMAX1-RV32: # %bb.0:
5061 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5062 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
5063 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
5064 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a1)
5065 ; LMULMAX1-RV32-NEXT: lui a2, 524288
5066 ; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2
5067 ; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0
5068 ; LMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma
5069 ; LMULMAX1-RV32-NEXT: vslideup.vi v11, v10, 2
5070 ; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI183_0)
5071 ; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI183_0)
5072 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5073 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a2)
5074 ; LMULMAX1-RV32-NEXT: vmulhu.vv v12, v9, v10
5075 ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v12
5076 ; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v9, v11
5077 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12
5078 ; LMULMAX1-RV32-NEXT: lui a2, 4128
5079 ; LMULMAX1-RV32-NEXT: addi a2, a2, 514
5080 ; LMULMAX1-RV32-NEXT: vmv.s.x v12, a2
5081 ; LMULMAX1-RV32-NEXT: vsext.vf4 v13, v12
5082 ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v13
5083 ; LMULMAX1-RV32-NEXT: vmulhu.vv v10, v8, v10
5084 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10
5085 ; LMULMAX1-RV32-NEXT: vmulhu.vv v8, v8, v11
5086 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
5087 ; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v13
5088 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
5089 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
5090 ; LMULMAX1-RV32-NEXT: ret
5092 ; LMULMAX1-RV64-LABEL: mulhu_v8i32:
5093 ; LMULMAX1-RV64: # %bb.0:
5094 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5095 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
5096 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
5097 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a1)
5098 ; LMULMAX1-RV64-NEXT: lui a2, 36976
5099 ; LMULMAX1-RV64-NEXT: addi a2, a2, 1541
5100 ; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2
5101 ; LMULMAX1-RV64-NEXT: vsext.vf4 v11, v10
5102 ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v9, v11
5103 ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11
5104 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
5105 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a1)
5106 ; LMULMAX1-RV64-NEXT: ret
5107 %a = load <8 x i32>, ptr %x
5108 %b = udiv <8 x i32> %a, <i32 5, i32 6, i32 7, i32 9, i32 5, i32 6, i32 7, i32 9>
5109 store <8 x i32> %b, ptr %x
5113 define void @mulhu_v4i64(ptr %x) {
5114 ; LMULMAX2-RV32-LABEL: mulhu_v4i64:
5115 ; LMULMAX2-RV32: # %bb.0:
5116 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5117 ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0)
5118 ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI184_0)
5119 ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI184_0)
5120 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5121 ; LMULMAX2-RV32-NEXT: vle32.v v10, (a1)
5122 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5123 ; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v8, v10
5124 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10
5125 ; LMULMAX2-RV32-NEXT: lui a1, 524288
5126 ; LMULMAX2-RV32-NEXT: vmv.s.x v12, a1
5127 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5128 ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0
5129 ; LMULMAX2-RV32-NEXT: vsetivli zero, 6, e32, m2, tu, ma
5130 ; LMULMAX2-RV32-NEXT: vslideup.vi v14, v12, 5
5131 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5132 ; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v14
5133 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
5134 ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI184_1)
5135 ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI184_1)
5136 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5137 ; LMULMAX2-RV32-NEXT: vle8.v v10, (a1)
5138 ; LMULMAX2-RV32-NEXT: vsext.vf4 v12, v10
5139 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5140 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v12
5141 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
5142 ; LMULMAX2-RV32-NEXT: ret
5144 ; LMULMAX2-RV64-LABEL: mulhu_v4i64:
5145 ; LMULMAX2-RV64: # %bb.0:
5146 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5147 ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0)
5148 ; LMULMAX2-RV64-NEXT: li a1, -1
5149 ; LMULMAX2-RV64-NEXT: slli a1, a1, 63
5150 ; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1
5151 ; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0
5152 ; LMULMAX2-RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma
5153 ; LMULMAX2-RV64-NEXT: vslideup.vi v12, v10, 2
5154 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_0)
5155 ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_0)
5156 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5157 ; LMULMAX2-RV64-NEXT: vle64.v v10, (a1)
5158 ; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10
5159 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10
5160 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12
5161 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
5162 ; LMULMAX2-RV64-NEXT: lui a1, 12320
5163 ; LMULMAX2-RV64-NEXT: addi a1, a1, 513
5164 ; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1
5165 ; LMULMAX2-RV64-NEXT: vsext.vf8 v12, v10
5166 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12
5167 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0)
5168 ; LMULMAX2-RV64-NEXT: ret
5170 ; LMULMAX1-RV32-LABEL: mulhu_v4i64:
5171 ; LMULMAX1-RV32: # %bb.0:
5172 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5173 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
5174 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
5175 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1)
5176 ; LMULMAX1-RV32-NEXT: lui a2, 144
5177 ; LMULMAX1-RV32-NEXT: addi a2, a2, 7
5178 ; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2
5179 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5180 ; LMULMAX1-RV32-NEXT: vsext.vf4 v11, v10
5181 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5182 ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v11
5183 ; LMULMAX1-RV32-NEXT: lui a2, 80
5184 ; LMULMAX1-RV32-NEXT: addi a2, a2, 3
5185 ; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2
5186 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5187 ; LMULMAX1-RV32-NEXT: vsext.vf4 v11, v10
5188 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5189 ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11
5190 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
5191 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1)
5192 ; LMULMAX1-RV32-NEXT: ret
5194 ; LMULMAX1-RV64-LABEL: mulhu_v4i64:
5195 ; LMULMAX1-RV64: # %bb.0:
5196 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5197 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
5198 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
5199 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a1)
5200 ; LMULMAX1-RV64-NEXT: vmv.v.i v10, 0
5201 ; LMULMAX1-RV64-NEXT: li a2, -1
5202 ; LMULMAX1-RV64-NEXT: slli a2, a2, 63
5203 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
5204 ; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2
5205 ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_0)
5206 ; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI184_0)
5207 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
5208 ; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero
5209 ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_1)
5210 ; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI184_1)(a2)
5211 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
5212 ; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2
5213 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
5214 ; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11
5215 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11
5216 ; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10
5217 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11
5218 ; LMULMAX1-RV64-NEXT: vid.v v10
5219 ; LMULMAX1-RV64-NEXT: vadd.vi v11, v10, 2
5220 ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v11
5221 ; LMULMAX1-RV64-NEXT: lui a2, 838861
5222 ; LMULMAX1-RV64-NEXT: addiw a2, a2, -819
5223 ; LMULMAX1-RV64-NEXT: slli a3, a2, 32
5224 ; LMULMAX1-RV64-NEXT: add a2, a2, a3
5225 ; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2
5226 ; LMULMAX1-RV64-NEXT: lui a2, 699051
5227 ; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365
5228 ; LMULMAX1-RV64-NEXT: slli a3, a2, 32
5229 ; LMULMAX1-RV64-NEXT: add a2, a2, a3
5230 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
5231 ; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2
5232 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
5233 ; LMULMAX1-RV64-NEXT: vmulhu.vv v8, v8, v11
5234 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v10, 1
5235 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v10
5236 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
5237 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a1)
5238 ; LMULMAX1-RV64-NEXT: ret
5239 %a = load <4 x i64>, ptr %x
5240 %b = udiv <4 x i64> %a, <i64 3, i64 5, i64 7, i64 9>
5241 store <4 x i64> %b, ptr %x
5245 define void @mulhs_v32i8(ptr %x) {
5246 ; LMULMAX2-LABEL: mulhs_v32i8:
5247 ; LMULMAX2: # %bb.0:
5248 ; LMULMAX2-NEXT: li a1, 32
5249 ; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
5250 ; LMULMAX2-NEXT: vle8.v v8, (a0)
5251 ; LMULMAX2-NEXT: vmv.v.i v10, 7
5252 ; LMULMAX2-NEXT: lui a1, 304453
5253 ; LMULMAX2-NEXT: addi a1, a1, -1452
5254 ; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
5255 ; LMULMAX2-NEXT: vmv.s.x v0, a1
5256 ; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
5257 ; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
5258 ; LMULMAX2-NEXT: li a1, -123
5259 ; LMULMAX2-NEXT: vmv.v.x v12, a1
5260 ; LMULMAX2-NEXT: li a1, 57
5261 ; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0
5262 ; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12
5263 ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
5264 ; LMULMAX2-NEXT: vse8.v v8, (a0)
5265 ; LMULMAX2-NEXT: ret
5267 ; LMULMAX1-LABEL: mulhs_v32i8:
5268 ; LMULMAX1: # %bb.0:
5269 ; LMULMAX1-NEXT: vsetivli zero, 16, e16, m2, ta, ma
5270 ; LMULMAX1-NEXT: vle8.v v8, (a0)
5271 ; LMULMAX1-NEXT: addi a1, a0, 16
5272 ; LMULMAX1-NEXT: vle8.v v9, (a1)
5273 ; LMULMAX1-NEXT: lui a2, 5
5274 ; LMULMAX1-NEXT: addi a2, a2, -1452
5275 ; LMULMAX1-NEXT: vmv.s.x v0, a2
5276 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, m1, ta, ma
5277 ; LMULMAX1-NEXT: vmv.v.i v10, -9
5278 ; LMULMAX1-NEXT: vmerge.vim v10, v10, 9, v0
5279 ; LMULMAX1-NEXT: vdivu.vv v9, v9, v10
5280 ; LMULMAX1-NEXT: vdivu.vv v8, v8, v10
5281 ; LMULMAX1-NEXT: vse8.v v8, (a0)
5282 ; LMULMAX1-NEXT: vse8.v v9, (a1)
5283 ; LMULMAX1-NEXT: ret
5284 %a = load <32 x i8>, ptr %x
5285 %b = udiv <32 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
5286 store <32 x i8> %b, ptr %x
5290 define void @mulhs_v16i16(ptr %x) {
5291 ; LMULMAX2-LABEL: mulhs_v16i16:
5292 ; LMULMAX2: # %bb.0:
5293 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
5294 ; LMULMAX2-NEXT: vle16.v v8, (a0)
5295 ; LMULMAX2-NEXT: lui a1, 5
5296 ; LMULMAX2-NEXT: addi a1, a1, -1755
5297 ; LMULMAX2-NEXT: vmv.v.x v10, a1
5298 ; LMULMAX2-NEXT: lui a1, 7
5299 ; LMULMAX2-NEXT: addi a1, a1, -1687
5300 ; LMULMAX2-NEXT: vmv.s.x v0, a1
5301 ; LMULMAX2-NEXT: lui a1, 1048571
5302 ; LMULMAX2-NEXT: addi a1, a1, 1755
5303 ; LMULMAX2-NEXT: vmerge.vxm v10, v10, a1, v0
5304 ; LMULMAX2-NEXT: vmulh.vv v8, v8, v10
5305 ; LMULMAX2-NEXT: vsra.vi v8, v8, 1
5306 ; LMULMAX2-NEXT: vsrl.vi v10, v8, 15
5307 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10
5308 ; LMULMAX2-NEXT: vse16.v v8, (a0)
5309 ; LMULMAX2-NEXT: ret
5311 ; LMULMAX1-LABEL: mulhs_v16i16:
5312 ; LMULMAX1: # %bb.0:
5313 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5314 ; LMULMAX1-NEXT: vle16.v v8, (a0)
5315 ; LMULMAX1-NEXT: addi a1, a0, 16
5316 ; LMULMAX1-NEXT: vle16.v v9, (a1)
5317 ; LMULMAX1-NEXT: li a2, 105
5318 ; LMULMAX1-NEXT: vmv.s.x v0, a2
5319 ; LMULMAX1-NEXT: vmv.v.i v10, 7
5320 ; LMULMAX1-NEXT: vmerge.vim v10, v10, -7, v0
5321 ; LMULMAX1-NEXT: vdiv.vv v9, v9, v10
5322 ; LMULMAX1-NEXT: vdiv.vv v8, v8, v10
5323 ; LMULMAX1-NEXT: vse16.v v8, (a0)
5324 ; LMULMAX1-NEXT: vse16.v v9, (a1)
5325 ; LMULMAX1-NEXT: ret
5326 %a = load <16 x i16>, ptr %x
5327 %b = sdiv <16 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7, i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7>
5328 store <16 x i16> %b, ptr %x
5332 define void @mulhs_v8i32(ptr %x) {
5333 ; LMULMAX2-RV32-LABEL: mulhs_v8i32:
5334 ; LMULMAX2-RV32: # %bb.0:
5335 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5336 ; LMULMAX2-RV32-NEXT: vle32.v v8, (a0)
5337 ; LMULMAX2-RV32-NEXT: lui a1, 419430
5338 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1639
5339 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
5340 ; LMULMAX2-RV32-NEXT: li a1, 85
5341 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1
5342 ; LMULMAX2-RV32-NEXT: lui a1, 629146
5343 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1639
5344 ; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0
5345 ; LMULMAX2-RV32-NEXT: vmulh.vv v8, v8, v10
5346 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 31
5347 ; LMULMAX2-RV32-NEXT: vsra.vi v8, v8, 1
5348 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
5349 ; LMULMAX2-RV32-NEXT: vse32.v v8, (a0)
5350 ; LMULMAX2-RV32-NEXT: ret
5352 ; LMULMAX2-RV64-LABEL: mulhs_v8i32:
5353 ; LMULMAX2-RV64: # %bb.0:
5354 ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5355 ; LMULMAX2-RV64-NEXT: vle32.v v8, (a0)
5356 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI187_0)
5357 ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI187_0)
5358 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5359 ; LMULMAX2-RV64-NEXT: vlse64.v v10, (a1), zero
5360 ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5361 ; LMULMAX2-RV64-NEXT: vmulh.vv v8, v8, v10
5362 ; LMULMAX2-RV64-NEXT: vsra.vi v8, v8, 1
5363 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 31
5364 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
5365 ; LMULMAX2-RV64-NEXT: vse32.v v8, (a0)
5366 ; LMULMAX2-RV64-NEXT: ret
5368 ; LMULMAX1-RV32-LABEL: mulhs_v8i32:
5369 ; LMULMAX1-RV32: # %bb.0:
5370 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5371 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
5372 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
5373 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a1)
5374 ; LMULMAX1-RV32-NEXT: lui a2, 419430
5375 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1639
5376 ; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2
5377 ; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5
5378 ; LMULMAX1-RV32-NEXT: lui a2, 629146
5379 ; LMULMAX1-RV32-NEXT: addi a2, a2, -1639
5380 ; LMULMAX1-RV32-NEXT: vmerge.vxm v10, v10, a2, v0
5381 ; LMULMAX1-RV32-NEXT: vmulh.vv v9, v9, v10
5382 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 31
5383 ; LMULMAX1-RV32-NEXT: vsra.vi v9, v9, 1
5384 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v11
5385 ; LMULMAX1-RV32-NEXT: vmulh.vv v8, v8, v10
5386 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 31
5387 ; LMULMAX1-RV32-NEXT: vsra.vi v8, v8, 1
5388 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10
5389 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
5390 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
5391 ; LMULMAX1-RV32-NEXT: ret
5393 ; LMULMAX1-RV64-LABEL: mulhs_v8i32:
5394 ; LMULMAX1-RV64: # %bb.0:
5395 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5396 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
5397 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
5398 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a1)
5399 ; LMULMAX1-RV64-NEXT: li a2, 3
5400 ; LMULMAX1-RV64-NEXT: slli a2, a2, 33
5401 ; LMULMAX1-RV64-NEXT: addi a2, a2, -5
5402 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5403 ; LMULMAX1-RV64-NEXT: vmv.v.x v10, a2
5404 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5405 ; LMULMAX1-RV64-NEXT: vdiv.vv v9, v9, v10
5406 ; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v10
5407 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
5408 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a1)
5409 ; LMULMAX1-RV64-NEXT: ret
5410 %a = load <8 x i32>, ptr %x
5411 %b = sdiv <8 x i32> %a, <i32 -5, i32 5, i32 -5, i32 5, i32 -5, i32 5, i32 -5, i32 5>
5412 store <8 x i32> %b, ptr %x
5416 define void @mulhs_v4i64(ptr %x) {
5417 ; LMULMAX2-RV32-LABEL: mulhs_v4i64:
5418 ; LMULMAX2-RV32: # %bb.0:
5419 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5420 ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0)
5421 ; LMULMAX2-RV32-NEXT: lui a1, 349525
5422 ; LMULMAX2-RV32-NEXT: addi a2, a1, 1365
5423 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5424 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2
5425 ; LMULMAX2-RV32-NEXT: li a2, 17
5426 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2
5427 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1366
5428 ; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0
5429 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5430 ; LMULMAX2-RV32-NEXT: vmulh.vv v10, v8, v10
5431 ; LMULMAX2-RV32-NEXT: lui a1, 1048560
5432 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
5433 ; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1
5434 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5435 ; LMULMAX2-RV32-NEXT: vsext.vf4 v14, v12
5436 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5437 ; LMULMAX2-RV32-NEXT: vmadd.vv v14, v8, v10
5438 ; LMULMAX2-RV32-NEXT: li a1, 63
5439 ; LMULMAX2-RV32-NEXT: vsrl.vx v8, v14, a1
5440 ; LMULMAX2-RV32-NEXT: lui a1, 16
5441 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
5442 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1
5443 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5444 ; LMULMAX2-RV32-NEXT: vsext.vf4 v12, v10
5445 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5446 ; LMULMAX2-RV32-NEXT: vsra.vv v10, v14, v12
5447 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8
5448 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0)
5449 ; LMULMAX2-RV32-NEXT: ret
5451 ; LMULMAX2-RV64-LABEL: mulhs_v4i64:
5452 ; LMULMAX2-RV64: # %bb.0:
5453 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5454 ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0)
5455 ; LMULMAX2-RV64-NEXT: lui a1, 349525
5456 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365
5457 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32
5458 ; LMULMAX2-RV64-NEXT: add a1, a1, a2
5459 ; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1
5460 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI188_0)
5461 ; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI188_0)(a1)
5462 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
5463 ; LMULMAX2-RV64-NEXT: vmv.v.i v0, 5
5464 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5465 ; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0
5466 ; LMULMAX2-RV64-NEXT: vmulh.vv v10, v8, v10
5467 ; LMULMAX2-RV64-NEXT: lui a1, 1044496
5468 ; LMULMAX2-RV64-NEXT: addi a1, a1, -256
5469 ; LMULMAX2-RV64-NEXT: vmv.s.x v12, a1
5470 ; LMULMAX2-RV64-NEXT: vsext.vf8 v14, v12
5471 ; LMULMAX2-RV64-NEXT: vmadd.vv v14, v8, v10
5472 ; LMULMAX2-RV64-NEXT: li a1, 63
5473 ; LMULMAX2-RV64-NEXT: vsrl.vx v8, v14, a1
5474 ; LMULMAX2-RV64-NEXT: lui a1, 4096
5475 ; LMULMAX2-RV64-NEXT: addi a1, a1, 256
5476 ; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1
5477 ; LMULMAX2-RV64-NEXT: vsext.vf8 v12, v10
5478 ; LMULMAX2-RV64-NEXT: vsra.vv v10, v14, v12
5479 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8
5480 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0)
5481 ; LMULMAX2-RV64-NEXT: ret
5483 ; LMULMAX1-RV32-LABEL: mulhs_v4i64:
5484 ; LMULMAX1-RV32: # %bb.0:
5485 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5486 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
5487 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16
5488 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1)
5489 ; LMULMAX1-RV32-NEXT: lui a2, 1048528
5490 ; LMULMAX1-RV32-NEXT: addi a2, a2, 3
5491 ; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2
5492 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5493 ; LMULMAX1-RV32-NEXT: vsext.vf4 v11, v10
5494 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5495 ; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v11
5496 ; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11
5497 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
5498 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1)
5499 ; LMULMAX1-RV32-NEXT: ret
5501 ; LMULMAX1-RV64-LABEL: mulhs_v4i64:
5502 ; LMULMAX1-RV64: # %bb.0:
5503 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5504 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
5505 ; LMULMAX1-RV64-NEXT: addi a1, a0, 16
5506 ; LMULMAX1-RV64-NEXT: lui a2, 349525
5507 ; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365
5508 ; LMULMAX1-RV64-NEXT: slli a3, a2, 32
5509 ; LMULMAX1-RV64-NEXT: add a2, a2, a3
5510 ; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI188_0)
5511 ; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI188_0)(a3)
5512 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a1)
5513 ; LMULMAX1-RV64-NEXT: vmv.v.x v10, a2
5514 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
5515 ; LMULMAX1-RV64-NEXT: vmv.s.x v10, a3
5516 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
5517 ; LMULMAX1-RV64-NEXT: vmulh.vv v11, v9, v10
5518 ; LMULMAX1-RV64-NEXT: vid.v v12
5519 ; LMULMAX1-RV64-NEXT: vrsub.vi v13, v12, 0
5520 ; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v9
5521 ; LMULMAX1-RV64-NEXT: li a2, 63
5522 ; LMULMAX1-RV64-NEXT: vsrl.vx v9, v11, a2
5523 ; LMULMAX1-RV64-NEXT: vsra.vv v11, v11, v12
5524 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v11, v9
5525 ; LMULMAX1-RV64-NEXT: vmulh.vv v10, v8, v10
5526 ; LMULMAX1-RV64-NEXT: vmacc.vv v10, v8, v13
5527 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v10, a2
5528 ; LMULMAX1-RV64-NEXT: vsra.vv v10, v10, v12
5529 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8
5530 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
5531 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a1)
5532 ; LMULMAX1-RV64-NEXT: ret
5533 %a = load <4 x i64>, ptr %x
5534 %b = sdiv <4 x i64> %a, <i64 3, i64 -3, i64 3, i64 -3>
5535 store <4 x i64> %b, ptr %x
5539 define void @smin_v32i8(ptr %x, ptr %y) {
5540 ; LMULMAX2-LABEL: smin_v32i8:
5541 ; LMULMAX2: # %bb.0:
5542 ; LMULMAX2-NEXT: li a2, 32
5543 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
5544 ; LMULMAX2-NEXT: vle8.v v8, (a0)
5545 ; LMULMAX2-NEXT: vle8.v v10, (a1)
5546 ; LMULMAX2-NEXT: vmin.vv v8, v8, v10
5547 ; LMULMAX2-NEXT: vse8.v v8, (a0)
5548 ; LMULMAX2-NEXT: ret
5550 ; LMULMAX1-RV32-LABEL: smin_v32i8:
5551 ; LMULMAX1-RV32: # %bb.0:
5552 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
5553 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
5554 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
5555 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
5556 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
5557 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
5558 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
5559 ; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10
5560 ; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11
5561 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
5562 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
5563 ; LMULMAX1-RV32-NEXT: ret
5565 ; LMULMAX1-RV64-LABEL: smin_v32i8:
5566 ; LMULMAX1-RV64: # %bb.0:
5567 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
5568 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
5569 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
5570 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
5571 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
5572 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
5573 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
5574 ; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9
5575 ; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11
5576 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
5577 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
5578 ; LMULMAX1-RV64-NEXT: ret
5579 %a = load <32 x i8>, ptr %x
5580 %b = load <32 x i8>, ptr %y
5581 %cc = icmp slt <32 x i8> %a, %b
5582 %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
5583 store <32 x i8> %c, ptr %x
5587 define void @smin_v16i16(ptr %x, ptr %y) {
5588 ; LMULMAX2-LABEL: smin_v16i16:
5589 ; LMULMAX2: # %bb.0:
5590 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
5591 ; LMULMAX2-NEXT: vle16.v v8, (a0)
5592 ; LMULMAX2-NEXT: vle16.v v10, (a1)
5593 ; LMULMAX2-NEXT: vmin.vv v8, v8, v10
5594 ; LMULMAX2-NEXT: vse16.v v8, (a0)
5595 ; LMULMAX2-NEXT: ret
5597 ; LMULMAX1-RV32-LABEL: smin_v16i16:
5598 ; LMULMAX1-RV32: # %bb.0:
5599 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5600 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
5601 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
5602 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
5603 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
5604 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
5605 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
5606 ; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10
5607 ; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11
5608 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
5609 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
5610 ; LMULMAX1-RV32-NEXT: ret
5612 ; LMULMAX1-RV64-LABEL: smin_v16i16:
5613 ; LMULMAX1-RV64: # %bb.0:
5614 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5615 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
5616 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
5617 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
5618 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
5619 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
5620 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
5621 ; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9
5622 ; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11
5623 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
5624 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
5625 ; LMULMAX1-RV64-NEXT: ret
5626 %a = load <16 x i16>, ptr %x
5627 %b = load <16 x i16>, ptr %y
5628 %cc = icmp slt <16 x i16> %a, %b
5629 %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
5630 store <16 x i16> %c, ptr %x
5634 define void @smin_v8i32(ptr %x, ptr %y) {
5635 ; LMULMAX2-LABEL: smin_v8i32:
5636 ; LMULMAX2: # %bb.0:
5637 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5638 ; LMULMAX2-NEXT: vle32.v v8, (a0)
5639 ; LMULMAX2-NEXT: vle32.v v10, (a1)
5640 ; LMULMAX2-NEXT: vmin.vv v8, v8, v10
5641 ; LMULMAX2-NEXT: vse32.v v8, (a0)
5642 ; LMULMAX2-NEXT: ret
5644 ; LMULMAX1-RV32-LABEL: smin_v8i32:
5645 ; LMULMAX1-RV32: # %bb.0:
5646 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5647 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
5648 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
5649 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
5650 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
5651 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
5652 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
5653 ; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10
5654 ; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11
5655 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
5656 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
5657 ; LMULMAX1-RV32-NEXT: ret
5659 ; LMULMAX1-RV64-LABEL: smin_v8i32:
5660 ; LMULMAX1-RV64: # %bb.0:
5661 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5662 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
5663 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
5664 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
5665 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
5666 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
5667 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
5668 ; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9
5669 ; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11
5670 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
5671 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
5672 ; LMULMAX1-RV64-NEXT: ret
5673 %a = load <8 x i32>, ptr %x
5674 %b = load <8 x i32>, ptr %y
5675 %cc = icmp slt <8 x i32> %a, %b
5676 %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
5677 store <8 x i32> %c, ptr %x
5681 define void @smin_v4i64(ptr %x, ptr %y) {
5682 ; LMULMAX2-LABEL: smin_v4i64:
5683 ; LMULMAX2: # %bb.0:
5684 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5685 ; LMULMAX2-NEXT: vle64.v v8, (a0)
5686 ; LMULMAX2-NEXT: vle64.v v10, (a1)
5687 ; LMULMAX2-NEXT: vmin.vv v8, v8, v10
5688 ; LMULMAX2-NEXT: vse64.v v8, (a0)
5689 ; LMULMAX2-NEXT: ret
5691 ; LMULMAX1-RV32-LABEL: smin_v4i64:
5692 ; LMULMAX1-RV32: # %bb.0:
5693 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5694 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
5695 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
5696 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
5697 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
5698 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
5699 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
5700 ; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10
5701 ; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11
5702 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
5703 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
5704 ; LMULMAX1-RV32-NEXT: ret
5706 ; LMULMAX1-RV64-LABEL: smin_v4i64:
5707 ; LMULMAX1-RV64: # %bb.0:
5708 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5709 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
5710 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
5711 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
5712 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
5713 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
5714 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
5715 ; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9
5716 ; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11
5717 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
5718 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
5719 ; LMULMAX1-RV64-NEXT: ret
5720 %a = load <4 x i64>, ptr %x
5721 %b = load <4 x i64>, ptr %y
5722 %cc = icmp slt <4 x i64> %a, %b
5723 %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
5724 store <4 x i64> %c, ptr %x
5728 define void @smax_v32i8(ptr %x, ptr %y) {
5729 ; LMULMAX2-LABEL: smax_v32i8:
5730 ; LMULMAX2: # %bb.0:
5731 ; LMULMAX2-NEXT: li a2, 32
5732 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
5733 ; LMULMAX2-NEXT: vle8.v v8, (a0)
5734 ; LMULMAX2-NEXT: vle8.v v10, (a1)
5735 ; LMULMAX2-NEXT: vmax.vv v8, v8, v10
5736 ; LMULMAX2-NEXT: vse8.v v8, (a0)
5737 ; LMULMAX2-NEXT: ret
5739 ; LMULMAX1-RV32-LABEL: smax_v32i8:
5740 ; LMULMAX1-RV32: # %bb.0:
5741 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
5742 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
5743 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
5744 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
5745 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
5746 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
5747 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
5748 ; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10
5749 ; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11
5750 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
5751 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
5752 ; LMULMAX1-RV32-NEXT: ret
5754 ; LMULMAX1-RV64-LABEL: smax_v32i8:
5755 ; LMULMAX1-RV64: # %bb.0:
5756 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
5757 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
5758 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
5759 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
5760 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
5761 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
5762 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
5763 ; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9
5764 ; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11
5765 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
5766 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
5767 ; LMULMAX1-RV64-NEXT: ret
5768 %a = load <32 x i8>, ptr %x
5769 %b = load <32 x i8>, ptr %y
5770 %cc = icmp sgt <32 x i8> %a, %b
5771 %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
5772 store <32 x i8> %c, ptr %x
5776 define void @smax_v16i16(ptr %x, ptr %y) {
5777 ; LMULMAX2-LABEL: smax_v16i16:
5778 ; LMULMAX2: # %bb.0:
5779 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
5780 ; LMULMAX2-NEXT: vle16.v v8, (a0)
5781 ; LMULMAX2-NEXT: vle16.v v10, (a1)
5782 ; LMULMAX2-NEXT: vmax.vv v8, v8, v10
5783 ; LMULMAX2-NEXT: vse16.v v8, (a0)
5784 ; LMULMAX2-NEXT: ret
5786 ; LMULMAX1-RV32-LABEL: smax_v16i16:
5787 ; LMULMAX1-RV32: # %bb.0:
5788 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5789 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
5790 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
5791 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
5792 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
5793 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
5794 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
5795 ; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10
5796 ; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11
5797 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
5798 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
5799 ; LMULMAX1-RV32-NEXT: ret
5801 ; LMULMAX1-RV64-LABEL: smax_v16i16:
5802 ; LMULMAX1-RV64: # %bb.0:
5803 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5804 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
5805 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
5806 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
5807 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
5808 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
5809 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
5810 ; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9
5811 ; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11
5812 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
5813 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
5814 ; LMULMAX1-RV64-NEXT: ret
5815 %a = load <16 x i16>, ptr %x
5816 %b = load <16 x i16>, ptr %y
5817 %cc = icmp sgt <16 x i16> %a, %b
5818 %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
5819 store <16 x i16> %c, ptr %x
5823 define void @smax_v8i32(ptr %x, ptr %y) {
5824 ; LMULMAX2-LABEL: smax_v8i32:
5825 ; LMULMAX2: # %bb.0:
5826 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5827 ; LMULMAX2-NEXT: vle32.v v8, (a0)
5828 ; LMULMAX2-NEXT: vle32.v v10, (a1)
5829 ; LMULMAX2-NEXT: vmax.vv v8, v8, v10
5830 ; LMULMAX2-NEXT: vse32.v v8, (a0)
5831 ; LMULMAX2-NEXT: ret
5833 ; LMULMAX1-RV32-LABEL: smax_v8i32:
5834 ; LMULMAX1-RV32: # %bb.0:
5835 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5836 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
5837 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
5838 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
5839 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
5840 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
5841 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
5842 ; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10
5843 ; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11
5844 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
5845 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
5846 ; LMULMAX1-RV32-NEXT: ret
5848 ; LMULMAX1-RV64-LABEL: smax_v8i32:
5849 ; LMULMAX1-RV64: # %bb.0:
5850 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5851 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
5852 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
5853 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
5854 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
5855 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
5856 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
5857 ; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9
5858 ; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11
5859 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
5860 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
5861 ; LMULMAX1-RV64-NEXT: ret
5862 %a = load <8 x i32>, ptr %x
5863 %b = load <8 x i32>, ptr %y
5864 %cc = icmp sgt <8 x i32> %a, %b
5865 %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
5866 store <8 x i32> %c, ptr %x
5870 define void @smax_v4i64(ptr %x, ptr %y) {
5871 ; LMULMAX2-LABEL: smax_v4i64:
5872 ; LMULMAX2: # %bb.0:
5873 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
5874 ; LMULMAX2-NEXT: vle64.v v8, (a0)
5875 ; LMULMAX2-NEXT: vle64.v v10, (a1)
5876 ; LMULMAX2-NEXT: vmax.vv v8, v8, v10
5877 ; LMULMAX2-NEXT: vse64.v v8, (a0)
5878 ; LMULMAX2-NEXT: ret
5880 ; LMULMAX1-RV32-LABEL: smax_v4i64:
5881 ; LMULMAX1-RV32: # %bb.0:
5882 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5883 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
5884 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
5885 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
5886 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
5887 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
5888 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
5889 ; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10
5890 ; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11
5891 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
5892 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
5893 ; LMULMAX1-RV32-NEXT: ret
5895 ; LMULMAX1-RV64-LABEL: smax_v4i64:
5896 ; LMULMAX1-RV64: # %bb.0:
5897 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5898 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
5899 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
5900 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
5901 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
5902 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
5903 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
5904 ; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9
5905 ; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11
5906 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
5907 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
5908 ; LMULMAX1-RV64-NEXT: ret
5909 %a = load <4 x i64>, ptr %x
5910 %b = load <4 x i64>, ptr %y
5911 %cc = icmp sgt <4 x i64> %a, %b
5912 %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
5913 store <4 x i64> %c, ptr %x
5917 define void @umin_v32i8(ptr %x, ptr %y) {
5918 ; LMULMAX2-LABEL: umin_v32i8:
5919 ; LMULMAX2: # %bb.0:
5920 ; LMULMAX2-NEXT: li a2, 32
5921 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
5922 ; LMULMAX2-NEXT: vle8.v v8, (a0)
5923 ; LMULMAX2-NEXT: vle8.v v10, (a1)
5924 ; LMULMAX2-NEXT: vminu.vv v8, v8, v10
5925 ; LMULMAX2-NEXT: vse8.v v8, (a0)
5926 ; LMULMAX2-NEXT: ret
5928 ; LMULMAX1-RV32-LABEL: umin_v32i8:
5929 ; LMULMAX1-RV32: # %bb.0:
5930 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
5931 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
5932 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
5933 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
5934 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
5935 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
5936 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
5937 ; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10
5938 ; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11
5939 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
5940 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
5941 ; LMULMAX1-RV32-NEXT: ret
5943 ; LMULMAX1-RV64-LABEL: umin_v32i8:
5944 ; LMULMAX1-RV64: # %bb.0:
5945 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
5946 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
5947 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
5948 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
5949 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
5950 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
5951 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
5952 ; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9
5953 ; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11
5954 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
5955 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
5956 ; LMULMAX1-RV64-NEXT: ret
5957 %a = load <32 x i8>, ptr %x
5958 %b = load <32 x i8>, ptr %y
5959 %cc = icmp ult <32 x i8> %a, %b
5960 %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
5961 store <32 x i8> %c, ptr %x
5965 define void @umin_v16i16(ptr %x, ptr %y) {
5966 ; LMULMAX2-LABEL: umin_v16i16:
5967 ; LMULMAX2: # %bb.0:
5968 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
5969 ; LMULMAX2-NEXT: vle16.v v8, (a0)
5970 ; LMULMAX2-NEXT: vle16.v v10, (a1)
5971 ; LMULMAX2-NEXT: vminu.vv v8, v8, v10
5972 ; LMULMAX2-NEXT: vse16.v v8, (a0)
5973 ; LMULMAX2-NEXT: ret
5975 ; LMULMAX1-RV32-LABEL: umin_v16i16:
5976 ; LMULMAX1-RV32: # %bb.0:
5977 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5978 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
5979 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
5980 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
5981 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
5982 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
5983 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
5984 ; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10
5985 ; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11
5986 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
5987 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
5988 ; LMULMAX1-RV32-NEXT: ret
5990 ; LMULMAX1-RV64-LABEL: umin_v16i16:
5991 ; LMULMAX1-RV64: # %bb.0:
5992 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5993 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
5994 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
5995 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
5996 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
5997 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
5998 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
5999 ; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9
6000 ; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11
6001 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
6002 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
6003 ; LMULMAX1-RV64-NEXT: ret
6004 %a = load <16 x i16>, ptr %x
6005 %b = load <16 x i16>, ptr %y
6006 %cc = icmp ult <16 x i16> %a, %b
6007 %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
6008 store <16 x i16> %c, ptr %x
6012 define void @umin_v8i32(ptr %x, ptr %y) {
6013 ; LMULMAX2-LABEL: umin_v8i32:
6014 ; LMULMAX2: # %bb.0:
6015 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6016 ; LMULMAX2-NEXT: vle32.v v8, (a0)
6017 ; LMULMAX2-NEXT: vle32.v v10, (a1)
6018 ; LMULMAX2-NEXT: vminu.vv v8, v8, v10
6019 ; LMULMAX2-NEXT: vse32.v v8, (a0)
6020 ; LMULMAX2-NEXT: ret
6022 ; LMULMAX1-RV32-LABEL: umin_v8i32:
6023 ; LMULMAX1-RV32: # %bb.0:
6024 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6025 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
6026 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
6027 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
6028 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
6029 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
6030 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
6031 ; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10
6032 ; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11
6033 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
6034 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
6035 ; LMULMAX1-RV32-NEXT: ret
6037 ; LMULMAX1-RV64-LABEL: umin_v8i32:
6038 ; LMULMAX1-RV64: # %bb.0:
6039 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6040 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
6041 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
6042 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
6043 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
6044 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
6045 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
6046 ; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9
6047 ; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11
6048 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
6049 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
6050 ; LMULMAX1-RV64-NEXT: ret
6051 %a = load <8 x i32>, ptr %x
6052 %b = load <8 x i32>, ptr %y
6053 %cc = icmp ult <8 x i32> %a, %b
6054 %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
6055 store <8 x i32> %c, ptr %x
6059 define void @umin_v4i64(ptr %x, ptr %y) {
6060 ; LMULMAX2-LABEL: umin_v4i64:
6061 ; LMULMAX2: # %bb.0:
6062 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
6063 ; LMULMAX2-NEXT: vle64.v v8, (a0)
6064 ; LMULMAX2-NEXT: vle64.v v10, (a1)
6065 ; LMULMAX2-NEXT: vminu.vv v8, v8, v10
6066 ; LMULMAX2-NEXT: vse64.v v8, (a0)
6067 ; LMULMAX2-NEXT: ret
6069 ; LMULMAX1-RV32-LABEL: umin_v4i64:
6070 ; LMULMAX1-RV32: # %bb.0:
6071 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6072 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
6073 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
6074 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
6075 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
6076 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
6077 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
6078 ; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10
6079 ; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11
6080 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
6081 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
6082 ; LMULMAX1-RV32-NEXT: ret
6084 ; LMULMAX1-RV64-LABEL: umin_v4i64:
6085 ; LMULMAX1-RV64: # %bb.0:
6086 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6087 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
6088 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
6089 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
6090 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
6091 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
6092 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
6093 ; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9
6094 ; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11
6095 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
6096 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
6097 ; LMULMAX1-RV64-NEXT: ret
6098 %a = load <4 x i64>, ptr %x
6099 %b = load <4 x i64>, ptr %y
6100 %cc = icmp ult <4 x i64> %a, %b
6101 %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
6102 store <4 x i64> %c, ptr %x
6106 define void @umax_v32i8(ptr %x, ptr %y) {
6107 ; LMULMAX2-LABEL: umax_v32i8:
6108 ; LMULMAX2: # %bb.0:
6109 ; LMULMAX2-NEXT: li a2, 32
6110 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
6111 ; LMULMAX2-NEXT: vle8.v v8, (a0)
6112 ; LMULMAX2-NEXT: vle8.v v10, (a1)
6113 ; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10
6114 ; LMULMAX2-NEXT: vse8.v v8, (a0)
6115 ; LMULMAX2-NEXT: ret
6117 ; LMULMAX1-RV32-LABEL: umax_v32i8:
6118 ; LMULMAX1-RV32: # %bb.0:
6119 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6120 ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0)
6121 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
6122 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2)
6123 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
6124 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3)
6125 ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1)
6126 ; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10
6127 ; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11
6128 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0)
6129 ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2)
6130 ; LMULMAX1-RV32-NEXT: ret
6132 ; LMULMAX1-RV64-LABEL: umax_v32i8:
6133 ; LMULMAX1-RV64: # %bb.0:
6134 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6135 ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0)
6136 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
6137 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2)
6138 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
6139 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2)
6140 ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1)
6141 ; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9
6142 ; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11
6143 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0)
6144 ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2)
6145 ; LMULMAX1-RV64-NEXT: ret
6146 %a = load <32 x i8>, ptr %x
6147 %b = load <32 x i8>, ptr %y
6148 %cc = icmp ugt <32 x i8> %a, %b
6149 %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
6150 store <32 x i8> %c, ptr %x
6154 define void @umax_v16i16(ptr %x, ptr %y) {
6155 ; LMULMAX2-LABEL: umax_v16i16:
6156 ; LMULMAX2: # %bb.0:
6157 ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, ma
6158 ; LMULMAX2-NEXT: vle16.v v8, (a0)
6159 ; LMULMAX2-NEXT: vle16.v v10, (a1)
6160 ; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10
6161 ; LMULMAX2-NEXT: vse16.v v8, (a0)
6162 ; LMULMAX2-NEXT: ret
6164 ; LMULMAX1-RV32-LABEL: umax_v16i16:
6165 ; LMULMAX1-RV32: # %bb.0:
6166 ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6167 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
6168 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
6169 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2)
6170 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
6171 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3)
6172 ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1)
6173 ; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10
6174 ; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11
6175 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
6176 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2)
6177 ; LMULMAX1-RV32-NEXT: ret
6179 ; LMULMAX1-RV64-LABEL: umax_v16i16:
6180 ; LMULMAX1-RV64: # %bb.0:
6181 ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6182 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
6183 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
6184 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2)
6185 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
6186 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
6187 ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1)
6188 ; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9
6189 ; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11
6190 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0)
6191 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2)
6192 ; LMULMAX1-RV64-NEXT: ret
6193 %a = load <16 x i16>, ptr %x
6194 %b = load <16 x i16>, ptr %y
6195 %cc = icmp ugt <16 x i16> %a, %b
6196 %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
6197 store <16 x i16> %c, ptr %x
6201 define void @umax_v8i32(ptr %x, ptr %y) {
6202 ; LMULMAX2-LABEL: umax_v8i32:
6203 ; LMULMAX2: # %bb.0:
6204 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6205 ; LMULMAX2-NEXT: vle32.v v8, (a0)
6206 ; LMULMAX2-NEXT: vle32.v v10, (a1)
6207 ; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10
6208 ; LMULMAX2-NEXT: vse32.v v8, (a0)
6209 ; LMULMAX2-NEXT: ret
6211 ; LMULMAX1-RV32-LABEL: umax_v8i32:
6212 ; LMULMAX1-RV32: # %bb.0:
6213 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6214 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0)
6215 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
6216 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2)
6217 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
6218 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3)
6219 ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1)
6220 ; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10
6221 ; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11
6222 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
6223 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2)
6224 ; LMULMAX1-RV32-NEXT: ret
6226 ; LMULMAX1-RV64-LABEL: umax_v8i32:
6227 ; LMULMAX1-RV64: # %bb.0:
6228 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6229 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0)
6230 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
6231 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2)
6232 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
6233 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2)
6234 ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1)
6235 ; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9
6236 ; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11
6237 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
6238 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2)
6239 ; LMULMAX1-RV64-NEXT: ret
6240 %a = load <8 x i32>, ptr %x
6241 %b = load <8 x i32>, ptr %y
6242 %cc = icmp ugt <8 x i32> %a, %b
6243 %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
6244 store <8 x i32> %c, ptr %x
6248 define void @umax_v4i64(ptr %x, ptr %y) {
6249 ; LMULMAX2-LABEL: umax_v4i64:
6250 ; LMULMAX2: # %bb.0:
6251 ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, ma
6252 ; LMULMAX2-NEXT: vle64.v v8, (a0)
6253 ; LMULMAX2-NEXT: vle64.v v10, (a1)
6254 ; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10
6255 ; LMULMAX2-NEXT: vse64.v v8, (a0)
6256 ; LMULMAX2-NEXT: ret
6258 ; LMULMAX1-RV32-LABEL: umax_v4i64:
6259 ; LMULMAX1-RV32: # %bb.0:
6260 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6261 ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0)
6262 ; LMULMAX1-RV32-NEXT: addi a2, a0, 16
6263 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2)
6264 ; LMULMAX1-RV32-NEXT: addi a3, a1, 16
6265 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3)
6266 ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1)
6267 ; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10
6268 ; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11
6269 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0)
6270 ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2)
6271 ; LMULMAX1-RV32-NEXT: ret
6273 ; LMULMAX1-RV64-LABEL: umax_v4i64:
6274 ; LMULMAX1-RV64: # %bb.0:
6275 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6276 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
6277 ; LMULMAX1-RV64-NEXT: addi a2, a1, 16
6278 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2)
6279 ; LMULMAX1-RV64-NEXT: addi a2, a0, 16
6280 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2)
6281 ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1)
6282 ; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9
6283 ; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11
6284 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
6285 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2)
6286 ; LMULMAX1-RV64-NEXT: ret
6287 %a = load <4 x i64>, ptr %x
6288 %b = load <4 x i64>, ptr %y
6289 %cc = icmp ugt <4 x i64> %a, %b
6290 %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
6291 store <4 x i64> %c, ptr %x
6295 define void @add_vi_v16i8(ptr %x) {
6296 ; CHECK-LABEL: add_vi_v16i8:
6298 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6299 ; CHECK-NEXT: vle8.v v8, (a0)
6300 ; CHECK-NEXT: vadd.vi v8, v8, -1
6301 ; CHECK-NEXT: vse8.v v8, (a0)
6303 %a = load <16 x i8>, ptr %x
6304 %b = insertelement <16 x i8> poison, i8 -1, i32 0
6305 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6306 %d = add <16 x i8> %a, %c
6307 store <16 x i8> %d, ptr %x
6311 define void @add_vi_v8i16(ptr %x) {
6312 ; CHECK-LABEL: add_vi_v8i16:
6314 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6315 ; CHECK-NEXT: vle16.v v8, (a0)
6316 ; CHECK-NEXT: vadd.vi v8, v8, -1
6317 ; CHECK-NEXT: vse16.v v8, (a0)
6319 %a = load <8 x i16>, ptr %x
6320 %b = insertelement <8 x i16> poison, i16 -1, i32 0
6321 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6322 %d = add <8 x i16> %a, %c
6323 store <8 x i16> %d, ptr %x
6327 define void @add_vi_v4i32(ptr %x) {
6328 ; CHECK-LABEL: add_vi_v4i32:
6330 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6331 ; CHECK-NEXT: vle32.v v8, (a0)
6332 ; CHECK-NEXT: vadd.vi v8, v8, -1
6333 ; CHECK-NEXT: vse32.v v8, (a0)
6335 %a = load <4 x i32>, ptr %x
6336 %b = insertelement <4 x i32> poison, i32 -1, i32 0
6337 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6338 %d = add <4 x i32> %a, %c
6339 store <4 x i32> %d, ptr %x
6343 define void @add_vi_v2i64(ptr %x) {
6344 ; CHECK-LABEL: add_vi_v2i64:
6346 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6347 ; CHECK-NEXT: vle64.v v8, (a0)
6348 ; CHECK-NEXT: vadd.vi v8, v8, -1
6349 ; CHECK-NEXT: vse64.v v8, (a0)
6351 %a = load <2 x i64>, ptr %x
6352 %b = insertelement <2 x i64> poison, i64 -1, i32 0
6353 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
6354 %d = add <2 x i64> %a, %c
6355 store <2 x i64> %d, ptr %x
6359 define void @add_iv_v16i8(ptr %x) {
6360 ; CHECK-LABEL: add_iv_v16i8:
6362 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6363 ; CHECK-NEXT: vle8.v v8, (a0)
6364 ; CHECK-NEXT: vadd.vi v8, v8, 1
6365 ; CHECK-NEXT: vse8.v v8, (a0)
6367 %a = load <16 x i8>, ptr %x
6368 %b = insertelement <16 x i8> poison, i8 1, i32 0
6369 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6370 %d = add <16 x i8> %c, %a
6371 store <16 x i8> %d, ptr %x
6375 define void @add_iv_v8i16(ptr %x) {
6376 ; CHECK-LABEL: add_iv_v8i16:
6378 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6379 ; CHECK-NEXT: vle16.v v8, (a0)
6380 ; CHECK-NEXT: vadd.vi v8, v8, 1
6381 ; CHECK-NEXT: vse16.v v8, (a0)
6383 %a = load <8 x i16>, ptr %x
6384 %b = insertelement <8 x i16> poison, i16 1, i32 0
6385 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6386 %d = add <8 x i16> %c, %a
6387 store <8 x i16> %d, ptr %x
6391 define void @add_iv_v4i32(ptr %x) {
6392 ; CHECK-LABEL: add_iv_v4i32:
6394 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6395 ; CHECK-NEXT: vle32.v v8, (a0)
6396 ; CHECK-NEXT: vadd.vi v8, v8, 1
6397 ; CHECK-NEXT: vse32.v v8, (a0)
6399 %a = load <4 x i32>, ptr %x
6400 %b = insertelement <4 x i32> poison, i32 1, i32 0
6401 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6402 %d = add <4 x i32> %c, %a
6403 store <4 x i32> %d, ptr %x
6407 define void @add_iv_v2i64(ptr %x) {
6408 ; CHECK-LABEL: add_iv_v2i64:
6410 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6411 ; CHECK-NEXT: vle64.v v8, (a0)
6412 ; CHECK-NEXT: vadd.vi v8, v8, 1
6413 ; CHECK-NEXT: vse64.v v8, (a0)
6415 %a = load <2 x i64>, ptr %x
6416 %b = insertelement <2 x i64> poison, i64 1, i32 0
6417 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
6418 %d = add <2 x i64> %c, %a
6419 store <2 x i64> %d, ptr %x
6423 define void @add_vx_v16i8(ptr %x, i8 %y) {
6424 ; CHECK-LABEL: add_vx_v16i8:
6426 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6427 ; CHECK-NEXT: vle8.v v8, (a0)
6428 ; CHECK-NEXT: vadd.vx v8, v8, a1
6429 ; CHECK-NEXT: vse8.v v8, (a0)
6431 %a = load <16 x i8>, ptr %x
6432 %b = insertelement <16 x i8> poison, i8 %y, i32 0
6433 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6434 %d = add <16 x i8> %a, %c
6435 store <16 x i8> %d, ptr %x
6439 define void @add_vx_v8i16(ptr %x, i16 %y) {
6440 ; CHECK-LABEL: add_vx_v8i16:
6442 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6443 ; CHECK-NEXT: vle16.v v8, (a0)
6444 ; CHECK-NEXT: vadd.vx v8, v8, a1
6445 ; CHECK-NEXT: vse16.v v8, (a0)
6447 %a = load <8 x i16>, ptr %x
6448 %b = insertelement <8 x i16> poison, i16 %y, i32 0
6449 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6450 %d = add <8 x i16> %a, %c
6451 store <8 x i16> %d, ptr %x
6455 define void @add_vx_v4i32(ptr %x, i32 %y) {
6456 ; CHECK-LABEL: add_vx_v4i32:
6458 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6459 ; CHECK-NEXT: vle32.v v8, (a0)
6460 ; CHECK-NEXT: vadd.vx v8, v8, a1
6461 ; CHECK-NEXT: vse32.v v8, (a0)
6463 %a = load <4 x i32>, ptr %x
6464 %b = insertelement <4 x i32> poison, i32 %y, i32 0
6465 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6466 %d = add <4 x i32> %a, %c
6467 store <4 x i32> %d, ptr %x
6471 define void @add_xv_v16i8(ptr %x, i8 %y) {
6472 ; CHECK-LABEL: add_xv_v16i8:
6474 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6475 ; CHECK-NEXT: vle8.v v8, (a0)
6476 ; CHECK-NEXT: vadd.vx v8, v8, a1
6477 ; CHECK-NEXT: vse8.v v8, (a0)
6479 %a = load <16 x i8>, ptr %x
6480 %b = insertelement <16 x i8> poison, i8 %y, i32 0
6481 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6482 %d = add <16 x i8> %c, %a
6483 store <16 x i8> %d, ptr %x
6487 define void @add_xv_v8i16(ptr %x, i16 %y) {
6488 ; CHECK-LABEL: add_xv_v8i16:
6490 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6491 ; CHECK-NEXT: vle16.v v8, (a0)
6492 ; CHECK-NEXT: vadd.vx v8, v8, a1
6493 ; CHECK-NEXT: vse16.v v8, (a0)
6495 %a = load <8 x i16>, ptr %x
6496 %b = insertelement <8 x i16> poison, i16 %y, i32 0
6497 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6498 %d = add <8 x i16> %c, %a
6499 store <8 x i16> %d, ptr %x
6503 define void @add_xv_v4i32(ptr %x, i32 %y) {
6504 ; CHECK-LABEL: add_xv_v4i32:
6506 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6507 ; CHECK-NEXT: vle32.v v8, (a0)
6508 ; CHECK-NEXT: vadd.vx v8, v8, a1
6509 ; CHECK-NEXT: vse32.v v8, (a0)
6511 %a = load <4 x i32>, ptr %x
6512 %b = insertelement <4 x i32> poison, i32 %y, i32 0
6513 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6514 %d = add <4 x i32> %c, %a
6515 store <4 x i32> %d, ptr %x
6519 define void @sub_vi_v16i8(ptr %x) {
6520 ; CHECK-LABEL: sub_vi_v16i8:
6522 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6523 ; CHECK-NEXT: vle8.v v8, (a0)
6524 ; CHECK-NEXT: li a1, -1
6525 ; CHECK-NEXT: vsub.vx v8, v8, a1
6526 ; CHECK-NEXT: vse8.v v8, (a0)
6528 %a = load <16 x i8>, ptr %x
6529 %b = insertelement <16 x i8> poison, i8 -1, i32 0
6530 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6531 %d = sub <16 x i8> %a, %c
6532 store <16 x i8> %d, ptr %x
6536 define void @sub_vi_v8i16(ptr %x) {
6537 ; CHECK-LABEL: sub_vi_v8i16:
6539 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6540 ; CHECK-NEXT: vle16.v v8, (a0)
6541 ; CHECK-NEXT: li a1, -1
6542 ; CHECK-NEXT: vsub.vx v8, v8, a1
6543 ; CHECK-NEXT: vse16.v v8, (a0)
6545 %a = load <8 x i16>, ptr %x
6546 %b = insertelement <8 x i16> poison, i16 -1, i32 0
6547 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6548 %d = sub <8 x i16> %a, %c
6549 store <8 x i16> %d, ptr %x
6553 define void @sub_vi_v4i32(ptr %x) {
6554 ; CHECK-LABEL: sub_vi_v4i32:
6556 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6557 ; CHECK-NEXT: vle32.v v8, (a0)
6558 ; CHECK-NEXT: li a1, -1
6559 ; CHECK-NEXT: vsub.vx v8, v8, a1
6560 ; CHECK-NEXT: vse32.v v8, (a0)
6562 %a = load <4 x i32>, ptr %x
6563 %b = insertelement <4 x i32> poison, i32 -1, i32 0
6564 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6565 %d = sub <4 x i32> %a, %c
6566 store <4 x i32> %d, ptr %x
6570 define void @sub_vi_v2i64(ptr %x) {
6571 ; CHECK-LABEL: sub_vi_v2i64:
6573 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6574 ; CHECK-NEXT: vle64.v v8, (a0)
6575 ; CHECK-NEXT: li a1, -1
6576 ; CHECK-NEXT: vsub.vx v8, v8, a1
6577 ; CHECK-NEXT: vse64.v v8, (a0)
6579 %a = load <2 x i64>, ptr %x
6580 %b = insertelement <2 x i64> poison, i64 -1, i32 0
6581 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
6582 %d = sub <2 x i64> %a, %c
6583 store <2 x i64> %d, ptr %x
6587 define void @sub_iv_v16i8(ptr %x) {
6588 ; CHECK-LABEL: sub_iv_v16i8:
6590 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6591 ; CHECK-NEXT: vle8.v v8, (a0)
6592 ; CHECK-NEXT: vrsub.vi v8, v8, 1
6593 ; CHECK-NEXT: vse8.v v8, (a0)
6595 %a = load <16 x i8>, ptr %x
6596 %b = insertelement <16 x i8> poison, i8 1, i32 0
6597 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6598 %d = sub <16 x i8> %c, %a
6599 store <16 x i8> %d, ptr %x
6603 define void @sub_iv_v8i16(ptr %x) {
6604 ; CHECK-LABEL: sub_iv_v8i16:
6606 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6607 ; CHECK-NEXT: vle16.v v8, (a0)
6608 ; CHECK-NEXT: vrsub.vi v8, v8, 1
6609 ; CHECK-NEXT: vse16.v v8, (a0)
6611 %a = load <8 x i16>, ptr %x
6612 %b = insertelement <8 x i16> poison, i16 1, i32 0
6613 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6614 %d = sub <8 x i16> %c, %a
6615 store <8 x i16> %d, ptr %x
6619 define void @sub_iv_v4i32(ptr %x) {
6620 ; CHECK-LABEL: sub_iv_v4i32:
6622 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6623 ; CHECK-NEXT: vle32.v v8, (a0)
6624 ; CHECK-NEXT: vrsub.vi v8, v8, 1
6625 ; CHECK-NEXT: vse32.v v8, (a0)
6627 %a = load <4 x i32>, ptr %x
6628 %b = insertelement <4 x i32> poison, i32 1, i32 0
6629 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6630 %d = sub <4 x i32> %c, %a
6631 store <4 x i32> %d, ptr %x
6635 define void @sub_iv_v2i64(ptr %x) {
6636 ; CHECK-LABEL: sub_iv_v2i64:
6638 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6639 ; CHECK-NEXT: vle64.v v8, (a0)
6640 ; CHECK-NEXT: vrsub.vi v8, v8, 1
6641 ; CHECK-NEXT: vse64.v v8, (a0)
6643 %a = load <2 x i64>, ptr %x
6644 %b = insertelement <2 x i64> poison, i64 1, i32 0
6645 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
6646 %d = sub <2 x i64> %c, %a
6647 store <2 x i64> %d, ptr %x
6651 define void @sub_vx_v16i8(ptr %x, i8 %y) {
6652 ; CHECK-LABEL: sub_vx_v16i8:
6654 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6655 ; CHECK-NEXT: vle8.v v8, (a0)
6656 ; CHECK-NEXT: vsub.vx v8, v8, a1
6657 ; CHECK-NEXT: vse8.v v8, (a0)
6659 %a = load <16 x i8>, ptr %x
6660 %b = insertelement <16 x i8> poison, i8 %y, i32 0
6661 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6662 %d = sub <16 x i8> %a, %c
6663 store <16 x i8> %d, ptr %x
6667 define void @sub_vx_v8i16(ptr %x, i16 %y) {
6668 ; CHECK-LABEL: sub_vx_v8i16:
6670 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6671 ; CHECK-NEXT: vle16.v v8, (a0)
6672 ; CHECK-NEXT: vsub.vx v8, v8, a1
6673 ; CHECK-NEXT: vse16.v v8, (a0)
6675 %a = load <8 x i16>, ptr %x
6676 %b = insertelement <8 x i16> poison, i16 %y, i32 0
6677 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6678 %d = sub <8 x i16> %a, %c
6679 store <8 x i16> %d, ptr %x
6683 define void @sub_vx_v4i32(ptr %x, i32 %y) {
6684 ; CHECK-LABEL: sub_vx_v4i32:
6686 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6687 ; CHECK-NEXT: vle32.v v8, (a0)
6688 ; CHECK-NEXT: vsub.vx v8, v8, a1
6689 ; CHECK-NEXT: vse32.v v8, (a0)
6691 %a = load <4 x i32>, ptr %x
6692 %b = insertelement <4 x i32> poison, i32 %y, i32 0
6693 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6694 %d = sub <4 x i32> %a, %c
6695 store <4 x i32> %d, ptr %x
6699 define void @sub_xv_v16i8(ptr %x, i8 %y) {
6700 ; CHECK-LABEL: sub_xv_v16i8:
6702 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6703 ; CHECK-NEXT: vle8.v v8, (a0)
6704 ; CHECK-NEXT: vrsub.vx v8, v8, a1
6705 ; CHECK-NEXT: vse8.v v8, (a0)
6707 %a = load <16 x i8>, ptr %x
6708 %b = insertelement <16 x i8> poison, i8 %y, i32 0
6709 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6710 %d = sub <16 x i8> %c, %a
6711 store <16 x i8> %d, ptr %x
6715 define void @sub_xv_v8i16(ptr %x, i16 %y) {
6716 ; CHECK-LABEL: sub_xv_v8i16:
6718 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6719 ; CHECK-NEXT: vle16.v v8, (a0)
6720 ; CHECK-NEXT: vrsub.vx v8, v8, a1
6721 ; CHECK-NEXT: vse16.v v8, (a0)
6723 %a = load <8 x i16>, ptr %x
6724 %b = insertelement <8 x i16> poison, i16 %y, i32 0
6725 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6726 %d = sub <8 x i16> %c, %a
6727 store <8 x i16> %d, ptr %x
6731 define void @sub_xv_v4i32(ptr %x, i32 %y) {
6732 ; CHECK-LABEL: sub_xv_v4i32:
6734 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6735 ; CHECK-NEXT: vle32.v v8, (a0)
6736 ; CHECK-NEXT: vrsub.vx v8, v8, a1
6737 ; CHECK-NEXT: vse32.v v8, (a0)
6739 %a = load <4 x i32>, ptr %x
6740 %b = insertelement <4 x i32> poison, i32 %y, i32 0
6741 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6742 %d = sub <4 x i32> %c, %a
6743 store <4 x i32> %d, ptr %x
6747 define void @mul_vx_v16i8(ptr %x, i8 %y) {
6748 ; CHECK-LABEL: mul_vx_v16i8:
6750 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6751 ; CHECK-NEXT: vle8.v v8, (a0)
6752 ; CHECK-NEXT: vmul.vx v8, v8, a1
6753 ; CHECK-NEXT: vse8.v v8, (a0)
6755 %a = load <16 x i8>, ptr %x
6756 %b = insertelement <16 x i8> poison, i8 %y, i32 0
6757 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6758 %d = mul <16 x i8> %a, %c
6759 store <16 x i8> %d, ptr %x
6763 define void @mul_vx_v8i16(ptr %x, i16 %y) {
6764 ; CHECK-LABEL: mul_vx_v8i16:
6766 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6767 ; CHECK-NEXT: vle16.v v8, (a0)
6768 ; CHECK-NEXT: vmul.vx v8, v8, a1
6769 ; CHECK-NEXT: vse16.v v8, (a0)
6771 %a = load <8 x i16>, ptr %x
6772 %b = insertelement <8 x i16> poison, i16 %y, i32 0
6773 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6774 %d = mul <8 x i16> %a, %c
6775 store <8 x i16> %d, ptr %x
6779 define void @mul_vx_v4i32(ptr %x, i32 %y) {
6780 ; CHECK-LABEL: mul_vx_v4i32:
6782 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6783 ; CHECK-NEXT: vle32.v v8, (a0)
6784 ; CHECK-NEXT: vmul.vx v8, v8, a1
6785 ; CHECK-NEXT: vse32.v v8, (a0)
6787 %a = load <4 x i32>, ptr %x
6788 %b = insertelement <4 x i32> poison, i32 %y, i32 0
6789 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6790 %d = mul <4 x i32> %a, %c
6791 store <4 x i32> %d, ptr %x
6795 define void @mul_xv_v16i8(ptr %x, i8 %y) {
6796 ; CHECK-LABEL: mul_xv_v16i8:
6798 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6799 ; CHECK-NEXT: vle8.v v8, (a0)
6800 ; CHECK-NEXT: vmul.vx v8, v8, a1
6801 ; CHECK-NEXT: vse8.v v8, (a0)
6803 %a = load <16 x i8>, ptr %x
6804 %b = insertelement <16 x i8> poison, i8 %y, i32 0
6805 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6806 %d = mul <16 x i8> %c, %a
6807 store <16 x i8> %d, ptr %x
6811 define void @mul_xv_v8i16(ptr %x, i16 %y) {
6812 ; CHECK-LABEL: mul_xv_v8i16:
6814 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6815 ; CHECK-NEXT: vle16.v v8, (a0)
6816 ; CHECK-NEXT: vmul.vx v8, v8, a1
6817 ; CHECK-NEXT: vse16.v v8, (a0)
6819 %a = load <8 x i16>, ptr %x
6820 %b = insertelement <8 x i16> poison, i16 %y, i32 0
6821 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6822 %d = mul <8 x i16> %c, %a
6823 store <8 x i16> %d, ptr %x
6827 define void @mul_xv_v4i32(ptr %x, i32 %y) {
6828 ; CHECK-LABEL: mul_xv_v4i32:
6830 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6831 ; CHECK-NEXT: vle32.v v8, (a0)
6832 ; CHECK-NEXT: vmul.vx v8, v8, a1
6833 ; CHECK-NEXT: vse32.v v8, (a0)
6835 %a = load <4 x i32>, ptr %x
6836 %b = insertelement <4 x i32> poison, i32 %y, i32 0
6837 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6838 %d = mul <4 x i32> %c, %a
6839 store <4 x i32> %d, ptr %x
6843 define void @and_vi_v16i8(ptr %x) {
6844 ; CHECK-LABEL: and_vi_v16i8:
6846 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6847 ; CHECK-NEXT: vle8.v v8, (a0)
6848 ; CHECK-NEXT: vand.vi v8, v8, -2
6849 ; CHECK-NEXT: vse8.v v8, (a0)
6851 %a = load <16 x i8>, ptr %x
6852 %b = insertelement <16 x i8> poison, i8 -2, i32 0
6853 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6854 %d = and <16 x i8> %a, %c
6855 store <16 x i8> %d, ptr %x
6859 define void @and_vi_v8i16(ptr %x) {
6860 ; CHECK-LABEL: and_vi_v8i16:
6862 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6863 ; CHECK-NEXT: vle16.v v8, (a0)
6864 ; CHECK-NEXT: vand.vi v8, v8, -2
6865 ; CHECK-NEXT: vse16.v v8, (a0)
6867 %a = load <8 x i16>, ptr %x
6868 %b = insertelement <8 x i16> poison, i16 -2, i32 0
6869 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6870 %d = and <8 x i16> %a, %c
6871 store <8 x i16> %d, ptr %x
6875 define void @and_vi_v4i32(ptr %x) {
6876 ; CHECK-LABEL: and_vi_v4i32:
6878 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6879 ; CHECK-NEXT: vle32.v v8, (a0)
6880 ; CHECK-NEXT: vand.vi v8, v8, -2
6881 ; CHECK-NEXT: vse32.v v8, (a0)
6883 %a = load <4 x i32>, ptr %x
6884 %b = insertelement <4 x i32> poison, i32 -2, i32 0
6885 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6886 %d = and <4 x i32> %a, %c
6887 store <4 x i32> %d, ptr %x
6891 define void @and_vi_v2i64(ptr %x) {
6892 ; CHECK-LABEL: and_vi_v2i64:
6894 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6895 ; CHECK-NEXT: vle64.v v8, (a0)
6896 ; CHECK-NEXT: vand.vi v8, v8, -2
6897 ; CHECK-NEXT: vse64.v v8, (a0)
6899 %a = load <2 x i64>, ptr %x
6900 %b = insertelement <2 x i64> poison, i64 -2, i32 0
6901 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
6902 %d = and <2 x i64> %a, %c
6903 store <2 x i64> %d, ptr %x
6907 define void @and_iv_v16i8(ptr %x) {
6908 ; CHECK-LABEL: and_iv_v16i8:
6910 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6911 ; CHECK-NEXT: vle8.v v8, (a0)
6912 ; CHECK-NEXT: vand.vi v8, v8, 1
6913 ; CHECK-NEXT: vse8.v v8, (a0)
6915 %a = load <16 x i8>, ptr %x
6916 %b = insertelement <16 x i8> poison, i8 1, i32 0
6917 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6918 %d = and <16 x i8> %c, %a
6919 store <16 x i8> %d, ptr %x
6923 define void @and_iv_v8i16(ptr %x) {
6924 ; CHECK-LABEL: and_iv_v8i16:
6926 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6927 ; CHECK-NEXT: vle16.v v8, (a0)
6928 ; CHECK-NEXT: vand.vi v8, v8, 1
6929 ; CHECK-NEXT: vse16.v v8, (a0)
6931 %a = load <8 x i16>, ptr %x
6932 %b = insertelement <8 x i16> poison, i16 1, i32 0
6933 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6934 %d = and <8 x i16> %c, %a
6935 store <8 x i16> %d, ptr %x
6939 define void @and_iv_v4i32(ptr %x) {
6940 ; CHECK-LABEL: and_iv_v4i32:
6942 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6943 ; CHECK-NEXT: vle32.v v8, (a0)
6944 ; CHECK-NEXT: vand.vi v8, v8, 1
6945 ; CHECK-NEXT: vse32.v v8, (a0)
6947 %a = load <4 x i32>, ptr %x
6948 %b = insertelement <4 x i32> poison, i32 1, i32 0
6949 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
6950 %d = and <4 x i32> %c, %a
6951 store <4 x i32> %d, ptr %x
6955 define void @and_iv_v2i64(ptr %x) {
6956 ; CHECK-LABEL: and_iv_v2i64:
6958 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
6959 ; CHECK-NEXT: vle64.v v8, (a0)
6960 ; CHECK-NEXT: vand.vi v8, v8, 1
6961 ; CHECK-NEXT: vse64.v v8, (a0)
6963 %a = load <2 x i64>, ptr %x
6964 %b = insertelement <2 x i64> poison, i64 1, i32 0
6965 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
6966 %d = and <2 x i64> %c, %a
6967 store <2 x i64> %d, ptr %x
6971 define void @and_vx_v16i8(ptr %x, i8 %y) {
6972 ; CHECK-LABEL: and_vx_v16i8:
6974 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
6975 ; CHECK-NEXT: vle8.v v8, (a0)
6976 ; CHECK-NEXT: vand.vx v8, v8, a1
6977 ; CHECK-NEXT: vse8.v v8, (a0)
6979 %a = load <16 x i8>, ptr %x
6980 %b = insertelement <16 x i8> poison, i8 %y, i32 0
6981 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
6982 %d = and <16 x i8> %a, %c
6983 store <16 x i8> %d, ptr %x
6987 define void @and_vx_v8i16(ptr %x, i16 %y) {
6988 ; CHECK-LABEL: and_vx_v8i16:
6990 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6991 ; CHECK-NEXT: vle16.v v8, (a0)
6992 ; CHECK-NEXT: vand.vx v8, v8, a1
6993 ; CHECK-NEXT: vse16.v v8, (a0)
6995 %a = load <8 x i16>, ptr %x
6996 %b = insertelement <8 x i16> poison, i16 %y, i32 0
6997 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
6998 %d = and <8 x i16> %a, %c
6999 store <8 x i16> %d, ptr %x
7003 define void @and_vx_v4i32(ptr %x, i32 %y) {
7004 ; CHECK-LABEL: and_vx_v4i32:
7006 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7007 ; CHECK-NEXT: vle32.v v8, (a0)
7008 ; CHECK-NEXT: vand.vx v8, v8, a1
7009 ; CHECK-NEXT: vse32.v v8, (a0)
7011 %a = load <4 x i32>, ptr %x
7012 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7013 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7014 %d = and <4 x i32> %a, %c
7015 store <4 x i32> %d, ptr %x
7019 define void @and_xv_v16i8(ptr %x, i8 %y) {
7020 ; CHECK-LABEL: and_xv_v16i8:
7022 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7023 ; CHECK-NEXT: vle8.v v8, (a0)
7024 ; CHECK-NEXT: vand.vx v8, v8, a1
7025 ; CHECK-NEXT: vse8.v v8, (a0)
7027 %a = load <16 x i8>, ptr %x
7028 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7029 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7030 %d = and <16 x i8> %c, %a
7031 store <16 x i8> %d, ptr %x
7035 define void @and_xv_v8i16(ptr %x, i16 %y) {
7036 ; CHECK-LABEL: and_xv_v8i16:
7038 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7039 ; CHECK-NEXT: vle16.v v8, (a0)
7040 ; CHECK-NEXT: vand.vx v8, v8, a1
7041 ; CHECK-NEXT: vse16.v v8, (a0)
7043 %a = load <8 x i16>, ptr %x
7044 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7045 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7046 %d = and <8 x i16> %c, %a
7047 store <8 x i16> %d, ptr %x
7051 define void @and_xv_v4i32(ptr %x, i32 %y) {
7052 ; CHECK-LABEL: and_xv_v4i32:
7054 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7055 ; CHECK-NEXT: vle32.v v8, (a0)
7056 ; CHECK-NEXT: vand.vx v8, v8, a1
7057 ; CHECK-NEXT: vse32.v v8, (a0)
7059 %a = load <4 x i32>, ptr %x
7060 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7061 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7062 %d = and <4 x i32> %c, %a
7063 store <4 x i32> %d, ptr %x
7067 define void @or_vi_v16i8(ptr %x) {
7068 ; CHECK-LABEL: or_vi_v16i8:
7070 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7071 ; CHECK-NEXT: vle8.v v8, (a0)
7072 ; CHECK-NEXT: vor.vi v8, v8, -2
7073 ; CHECK-NEXT: vse8.v v8, (a0)
7075 %a = load <16 x i8>, ptr %x
7076 %b = insertelement <16 x i8> poison, i8 -2, i32 0
7077 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7078 %d = or <16 x i8> %a, %c
7079 store <16 x i8> %d, ptr %x
7083 define void @or_vi_v8i16(ptr %x) {
7084 ; CHECK-LABEL: or_vi_v8i16:
7086 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7087 ; CHECK-NEXT: vle16.v v8, (a0)
7088 ; CHECK-NEXT: vor.vi v8, v8, -2
7089 ; CHECK-NEXT: vse16.v v8, (a0)
7091 %a = load <8 x i16>, ptr %x
7092 %b = insertelement <8 x i16> poison, i16 -2, i32 0
7093 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7094 %d = or <8 x i16> %a, %c
7095 store <8 x i16> %d, ptr %x
7099 define void @or_vi_v4i32(ptr %x) {
7100 ; CHECK-LABEL: or_vi_v4i32:
7102 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7103 ; CHECK-NEXT: vle32.v v8, (a0)
7104 ; CHECK-NEXT: vor.vi v8, v8, -2
7105 ; CHECK-NEXT: vse32.v v8, (a0)
7107 %a = load <4 x i32>, ptr %x
7108 %b = insertelement <4 x i32> poison, i32 -2, i32 0
7109 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7110 %d = or <4 x i32> %a, %c
7111 store <4 x i32> %d, ptr %x
7115 define void @or_vi_v2i64(ptr %x) {
7116 ; CHECK-LABEL: or_vi_v2i64:
7118 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
7119 ; CHECK-NEXT: vle64.v v8, (a0)
7120 ; CHECK-NEXT: vor.vi v8, v8, -2
7121 ; CHECK-NEXT: vse64.v v8, (a0)
7123 %a = load <2 x i64>, ptr %x
7124 %b = insertelement <2 x i64> poison, i64 -2, i32 0
7125 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
7126 %d = or <2 x i64> %a, %c
7127 store <2 x i64> %d, ptr %x
7131 define void @or_iv_v16i8(ptr %x) {
7132 ; CHECK-LABEL: or_iv_v16i8:
7134 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7135 ; CHECK-NEXT: vle8.v v8, (a0)
7136 ; CHECK-NEXT: vor.vi v8, v8, 1
7137 ; CHECK-NEXT: vse8.v v8, (a0)
7139 %a = load <16 x i8>, ptr %x
7140 %b = insertelement <16 x i8> poison, i8 1, i32 0
7141 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7142 %d = or <16 x i8> %c, %a
7143 store <16 x i8> %d, ptr %x
7147 define void @or_iv_v8i16(ptr %x) {
7148 ; CHECK-LABEL: or_iv_v8i16:
7150 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7151 ; CHECK-NEXT: vle16.v v8, (a0)
7152 ; CHECK-NEXT: vor.vi v8, v8, 1
7153 ; CHECK-NEXT: vse16.v v8, (a0)
7155 %a = load <8 x i16>, ptr %x
7156 %b = insertelement <8 x i16> poison, i16 1, i32 0
7157 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7158 %d = or <8 x i16> %c, %a
7159 store <8 x i16> %d, ptr %x
7163 define void @or_iv_v4i32(ptr %x) {
7164 ; CHECK-LABEL: or_iv_v4i32:
7166 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7167 ; CHECK-NEXT: vle32.v v8, (a0)
7168 ; CHECK-NEXT: vor.vi v8, v8, 1
7169 ; CHECK-NEXT: vse32.v v8, (a0)
7171 %a = load <4 x i32>, ptr %x
7172 %b = insertelement <4 x i32> poison, i32 1, i32 0
7173 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7174 %d = or <4 x i32> %c, %a
7175 store <4 x i32> %d, ptr %x
7179 define void @or_iv_v2i64(ptr %x) {
7180 ; CHECK-LABEL: or_iv_v2i64:
7182 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
7183 ; CHECK-NEXT: vle64.v v8, (a0)
7184 ; CHECK-NEXT: vor.vi v8, v8, 1
7185 ; CHECK-NEXT: vse64.v v8, (a0)
7187 %a = load <2 x i64>, ptr %x
7188 %b = insertelement <2 x i64> poison, i64 1, i32 0
7189 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
7190 %d = or <2 x i64> %c, %a
7191 store <2 x i64> %d, ptr %x
7195 define void @or_vx_v16i8(ptr %x, i8 %y) {
7196 ; CHECK-LABEL: or_vx_v16i8:
7198 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7199 ; CHECK-NEXT: vle8.v v8, (a0)
7200 ; CHECK-NEXT: vor.vx v8, v8, a1
7201 ; CHECK-NEXT: vse8.v v8, (a0)
7203 %a = load <16 x i8>, ptr %x
7204 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7205 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7206 %d = or <16 x i8> %a, %c
7207 store <16 x i8> %d, ptr %x
7211 define void @or_vx_v8i16(ptr %x, i16 %y) {
7212 ; CHECK-LABEL: or_vx_v8i16:
7214 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7215 ; CHECK-NEXT: vle16.v v8, (a0)
7216 ; CHECK-NEXT: vor.vx v8, v8, a1
7217 ; CHECK-NEXT: vse16.v v8, (a0)
7219 %a = load <8 x i16>, ptr %x
7220 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7221 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7222 %d = or <8 x i16> %a, %c
7223 store <8 x i16> %d, ptr %x
7227 define void @or_vx_v4i32(ptr %x, i32 %y) {
7228 ; CHECK-LABEL: or_vx_v4i32:
7230 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7231 ; CHECK-NEXT: vle32.v v8, (a0)
7232 ; CHECK-NEXT: vor.vx v8, v8, a1
7233 ; CHECK-NEXT: vse32.v v8, (a0)
7235 %a = load <4 x i32>, ptr %x
7236 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7237 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7238 %d = or <4 x i32> %a, %c
7239 store <4 x i32> %d, ptr %x
7243 define void @or_xv_v16i8(ptr %x, i8 %y) {
7244 ; CHECK-LABEL: or_xv_v16i8:
7246 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7247 ; CHECK-NEXT: vle8.v v8, (a0)
7248 ; CHECK-NEXT: vor.vx v8, v8, a1
7249 ; CHECK-NEXT: vse8.v v8, (a0)
7251 %a = load <16 x i8>, ptr %x
7252 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7253 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7254 %d = or <16 x i8> %c, %a
7255 store <16 x i8> %d, ptr %x
7259 define void @or_xv_v8i16(ptr %x, i16 %y) {
7260 ; CHECK-LABEL: or_xv_v8i16:
7262 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7263 ; CHECK-NEXT: vle16.v v8, (a0)
7264 ; CHECK-NEXT: vor.vx v8, v8, a1
7265 ; CHECK-NEXT: vse16.v v8, (a0)
7267 %a = load <8 x i16>, ptr %x
7268 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7269 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7270 %d = or <8 x i16> %c, %a
7271 store <8 x i16> %d, ptr %x
7275 define void @or_xv_v4i32(ptr %x, i32 %y) {
7276 ; CHECK-LABEL: or_xv_v4i32:
7278 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7279 ; CHECK-NEXT: vle32.v v8, (a0)
7280 ; CHECK-NEXT: vor.vx v8, v8, a1
7281 ; CHECK-NEXT: vse32.v v8, (a0)
7283 %a = load <4 x i32>, ptr %x
7284 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7285 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7286 %d = or <4 x i32> %c, %a
7287 store <4 x i32> %d, ptr %x
7291 define void @xor_vi_v16i8(ptr %x) {
7292 ; CHECK-LABEL: xor_vi_v16i8:
7294 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7295 ; CHECK-NEXT: vle8.v v8, (a0)
7296 ; CHECK-NEXT: vnot.v v8, v8
7297 ; CHECK-NEXT: vse8.v v8, (a0)
7299 %a = load <16 x i8>, ptr %x
7300 %b = insertelement <16 x i8> poison, i8 -1, i32 0
7301 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7302 %d = xor <16 x i8> %a, %c
7303 store <16 x i8> %d, ptr %x
7307 define void @xor_vi_v8i16(ptr %x) {
7308 ; CHECK-LABEL: xor_vi_v8i16:
7310 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7311 ; CHECK-NEXT: vle16.v v8, (a0)
7312 ; CHECK-NEXT: vnot.v v8, v8
7313 ; CHECK-NEXT: vse16.v v8, (a0)
7315 %a = load <8 x i16>, ptr %x
7316 %b = insertelement <8 x i16> poison, i16 -1, i32 0
7317 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7318 %d = xor <8 x i16> %a, %c
7319 store <8 x i16> %d, ptr %x
7323 define void @xor_vi_v4i32(ptr %x) {
7324 ; CHECK-LABEL: xor_vi_v4i32:
7326 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7327 ; CHECK-NEXT: vle32.v v8, (a0)
7328 ; CHECK-NEXT: vnot.v v8, v8
7329 ; CHECK-NEXT: vse32.v v8, (a0)
7331 %a = load <4 x i32>, ptr %x
7332 %b = insertelement <4 x i32> poison, i32 -1, i32 0
7333 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7334 %d = xor <4 x i32> %a, %c
7335 store <4 x i32> %d, ptr %x
7339 define void @xor_vi_v2i64(ptr %x) {
7340 ; CHECK-LABEL: xor_vi_v2i64:
7342 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
7343 ; CHECK-NEXT: vle64.v v8, (a0)
7344 ; CHECK-NEXT: vnot.v v8, v8
7345 ; CHECK-NEXT: vse64.v v8, (a0)
7347 %a = load <2 x i64>, ptr %x
7348 %b = insertelement <2 x i64> poison, i64 -1, i32 0
7349 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
7350 %d = xor <2 x i64> %a, %c
7351 store <2 x i64> %d, ptr %x
7355 define void @xor_iv_v16i8(ptr %x) {
7356 ; CHECK-LABEL: xor_iv_v16i8:
7358 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7359 ; CHECK-NEXT: vle8.v v8, (a0)
7360 ; CHECK-NEXT: vxor.vi v8, v8, 1
7361 ; CHECK-NEXT: vse8.v v8, (a0)
7363 %a = load <16 x i8>, ptr %x
7364 %b = insertelement <16 x i8> poison, i8 1, i32 0
7365 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7366 %d = xor <16 x i8> %c, %a
7367 store <16 x i8> %d, ptr %x
7371 define void @xor_iv_v8i16(ptr %x) {
7372 ; CHECK-LABEL: xor_iv_v8i16:
7374 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7375 ; CHECK-NEXT: vle16.v v8, (a0)
7376 ; CHECK-NEXT: vxor.vi v8, v8, 1
7377 ; CHECK-NEXT: vse16.v v8, (a0)
7379 %a = load <8 x i16>, ptr %x
7380 %b = insertelement <8 x i16> poison, i16 1, i32 0
7381 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7382 %d = xor <8 x i16> %c, %a
7383 store <8 x i16> %d, ptr %x
7387 define void @xor_iv_v4i32(ptr %x) {
7388 ; CHECK-LABEL: xor_iv_v4i32:
7390 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7391 ; CHECK-NEXT: vle32.v v8, (a0)
7392 ; CHECK-NEXT: vxor.vi v8, v8, 1
7393 ; CHECK-NEXT: vse32.v v8, (a0)
7395 %a = load <4 x i32>, ptr %x
7396 %b = insertelement <4 x i32> poison, i32 1, i32 0
7397 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7398 %d = xor <4 x i32> %c, %a
7399 store <4 x i32> %d, ptr %x
7403 define void @xor_iv_v2i64(ptr %x) {
7404 ; CHECK-LABEL: xor_iv_v2i64:
7406 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
7407 ; CHECK-NEXT: vle64.v v8, (a0)
7408 ; CHECK-NEXT: vxor.vi v8, v8, 1
7409 ; CHECK-NEXT: vse64.v v8, (a0)
7411 %a = load <2 x i64>, ptr %x
7412 %b = insertelement <2 x i64> poison, i64 1, i32 0
7413 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
7414 %d = xor <2 x i64> %c, %a
7415 store <2 x i64> %d, ptr %x
7419 define void @xor_vx_v16i8(ptr %x, i8 %y) {
7420 ; CHECK-LABEL: xor_vx_v16i8:
7422 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7423 ; CHECK-NEXT: vle8.v v8, (a0)
7424 ; CHECK-NEXT: vxor.vx v8, v8, a1
7425 ; CHECK-NEXT: vse8.v v8, (a0)
7427 %a = load <16 x i8>, ptr %x
7428 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7429 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7430 %d = xor <16 x i8> %a, %c
7431 store <16 x i8> %d, ptr %x
7435 define void @xor_vx_v8i16(ptr %x, i16 %y) {
7436 ; CHECK-LABEL: xor_vx_v8i16:
7438 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7439 ; CHECK-NEXT: vle16.v v8, (a0)
7440 ; CHECK-NEXT: vxor.vx v8, v8, a1
7441 ; CHECK-NEXT: vse16.v v8, (a0)
7443 %a = load <8 x i16>, ptr %x
7444 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7445 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7446 %d = xor <8 x i16> %a, %c
7447 store <8 x i16> %d, ptr %x
7451 define void @xor_vx_v4i32(ptr %x, i32 %y) {
7452 ; CHECK-LABEL: xor_vx_v4i32:
7454 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7455 ; CHECK-NEXT: vle32.v v8, (a0)
7456 ; CHECK-NEXT: vxor.vx v8, v8, a1
7457 ; CHECK-NEXT: vse32.v v8, (a0)
7459 %a = load <4 x i32>, ptr %x
7460 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7461 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7462 %d = xor <4 x i32> %a, %c
7463 store <4 x i32> %d, ptr %x
7467 define void @xor_xv_v16i8(ptr %x, i8 %y) {
7468 ; CHECK-LABEL: xor_xv_v16i8:
7470 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7471 ; CHECK-NEXT: vle8.v v8, (a0)
7472 ; CHECK-NEXT: vxor.vx v8, v8, a1
7473 ; CHECK-NEXT: vse8.v v8, (a0)
7475 %a = load <16 x i8>, ptr %x
7476 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7477 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7478 %d = xor <16 x i8> %c, %a
7479 store <16 x i8> %d, ptr %x
7483 define void @xor_xv_v8i16(ptr %x, i16 %y) {
7484 ; CHECK-LABEL: xor_xv_v8i16:
7486 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7487 ; CHECK-NEXT: vle16.v v8, (a0)
7488 ; CHECK-NEXT: vxor.vx v8, v8, a1
7489 ; CHECK-NEXT: vse16.v v8, (a0)
7491 %a = load <8 x i16>, ptr %x
7492 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7493 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7494 %d = xor <8 x i16> %c, %a
7495 store <8 x i16> %d, ptr %x
7499 define void @xor_xv_v4i32(ptr %x, i32 %y) {
7500 ; CHECK-LABEL: xor_xv_v4i32:
7502 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7503 ; CHECK-NEXT: vle32.v v8, (a0)
7504 ; CHECK-NEXT: vxor.vx v8, v8, a1
7505 ; CHECK-NEXT: vse32.v v8, (a0)
7507 %a = load <4 x i32>, ptr %x
7508 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7509 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7510 %d = xor <4 x i32> %c, %a
7511 store <4 x i32> %d, ptr %x
7515 define void @lshr_vi_v16i8(ptr %x) {
7516 ; CHECK-LABEL: lshr_vi_v16i8:
7518 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7519 ; CHECK-NEXT: vle8.v v8, (a0)
7520 ; CHECK-NEXT: vsrl.vi v8, v8, 7
7521 ; CHECK-NEXT: vse8.v v8, (a0)
7523 %a = load <16 x i8>, ptr %x
7524 %b = insertelement <16 x i8> poison, i8 7, i32 0
7525 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7526 %d = lshr <16 x i8> %a, %c
7527 store <16 x i8> %d, ptr %x
7531 define void @lshr_vi_v8i16(ptr %x) {
7532 ; CHECK-LABEL: lshr_vi_v8i16:
7534 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7535 ; CHECK-NEXT: vle16.v v8, (a0)
7536 ; CHECK-NEXT: vsrl.vi v8, v8, 15
7537 ; CHECK-NEXT: vse16.v v8, (a0)
7539 %a = load <8 x i16>, ptr %x
7540 %b = insertelement <8 x i16> poison, i16 15, i32 0
7541 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7542 %d = lshr <8 x i16> %a, %c
7543 store <8 x i16> %d, ptr %x
7547 define void @lshr_vi_v4i32(ptr %x) {
7548 ; CHECK-LABEL: lshr_vi_v4i32:
7550 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7551 ; CHECK-NEXT: vle32.v v8, (a0)
7552 ; CHECK-NEXT: vsrl.vi v8, v8, 31
7553 ; CHECK-NEXT: vse32.v v8, (a0)
7555 %a = load <4 x i32>, ptr %x
7556 %b = insertelement <4 x i32> poison, i32 31, i32 0
7557 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7558 %d = lshr <4 x i32> %a, %c
7559 store <4 x i32> %d, ptr %x
7563 define void @lshr_vi_v2i64(ptr %x) {
7564 ; CHECK-LABEL: lshr_vi_v2i64:
7566 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
7567 ; CHECK-NEXT: vle64.v v8, (a0)
7568 ; CHECK-NEXT: vsrl.vi v8, v8, 31
7569 ; CHECK-NEXT: vse64.v v8, (a0)
7571 %a = load <2 x i64>, ptr %x
7572 %b = insertelement <2 x i64> poison, i64 31, i32 0
7573 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
7574 %d = lshr <2 x i64> %a, %c
7575 store <2 x i64> %d, ptr %x
7579 define void @lshr_vx_v16i8(ptr %x, i8 %y) {
7580 ; CHECK-LABEL: lshr_vx_v16i8:
7582 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7583 ; CHECK-NEXT: vle8.v v8, (a0)
7584 ; CHECK-NEXT: vsrl.vx v8, v8, a1
7585 ; CHECK-NEXT: vse8.v v8, (a0)
7587 %a = load <16 x i8>, ptr %x
7588 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7589 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7590 %d = lshr <16 x i8> %a, %c
7591 store <16 x i8> %d, ptr %x
7595 define void @lshr_vx_v8i16(ptr %x, i16 %y) {
7596 ; CHECK-LABEL: lshr_vx_v8i16:
7598 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7599 ; CHECK-NEXT: vle16.v v8, (a0)
7600 ; CHECK-NEXT: vsrl.vx v8, v8, a1
7601 ; CHECK-NEXT: vse16.v v8, (a0)
7603 %a = load <8 x i16>, ptr %x
7604 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7605 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7606 %d = lshr <8 x i16> %a, %c
7607 store <8 x i16> %d, ptr %x
7611 define void @lshr_vx_v4i32(ptr %x, i32 %y) {
7612 ; CHECK-LABEL: lshr_vx_v4i32:
7614 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7615 ; CHECK-NEXT: vle32.v v8, (a0)
7616 ; CHECK-NEXT: vsrl.vx v8, v8, a1
7617 ; CHECK-NEXT: vse32.v v8, (a0)
7619 %a = load <4 x i32>, ptr %x
7620 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7621 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7622 %d = lshr <4 x i32> %a, %c
7623 store <4 x i32> %d, ptr %x
7627 define void @ashr_vi_v16i8(ptr %x) {
7628 ; CHECK-LABEL: ashr_vi_v16i8:
7630 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7631 ; CHECK-NEXT: vle8.v v8, (a0)
7632 ; CHECK-NEXT: vsra.vi v8, v8, 7
7633 ; CHECK-NEXT: vse8.v v8, (a0)
7635 %a = load <16 x i8>, ptr %x
7636 %b = insertelement <16 x i8> poison, i8 7, i32 0
7637 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7638 %d = ashr <16 x i8> %a, %c
7639 store <16 x i8> %d, ptr %x
7643 define void @ashr_vi_v8i16(ptr %x) {
7644 ; CHECK-LABEL: ashr_vi_v8i16:
7646 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7647 ; CHECK-NEXT: vle16.v v8, (a0)
7648 ; CHECK-NEXT: vsra.vi v8, v8, 15
7649 ; CHECK-NEXT: vse16.v v8, (a0)
7651 %a = load <8 x i16>, ptr %x
7652 %b = insertelement <8 x i16> poison, i16 15, i32 0
7653 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7654 %d = ashr <8 x i16> %a, %c
7655 store <8 x i16> %d, ptr %x
7659 define void @ashr_vi_v4i32(ptr %x) {
7660 ; CHECK-LABEL: ashr_vi_v4i32:
7662 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7663 ; CHECK-NEXT: vle32.v v8, (a0)
7664 ; CHECK-NEXT: vsra.vi v8, v8, 31
7665 ; CHECK-NEXT: vse32.v v8, (a0)
7667 %a = load <4 x i32>, ptr %x
7668 %b = insertelement <4 x i32> poison, i32 31, i32 0
7669 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7670 %d = ashr <4 x i32> %a, %c
7671 store <4 x i32> %d, ptr %x
7675 define void @ashr_vi_v2i64(ptr %x) {
7676 ; CHECK-LABEL: ashr_vi_v2i64:
7678 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
7679 ; CHECK-NEXT: vle64.v v8, (a0)
7680 ; CHECK-NEXT: vsra.vi v8, v8, 31
7681 ; CHECK-NEXT: vse64.v v8, (a0)
7683 %a = load <2 x i64>, ptr %x
7684 %b = insertelement <2 x i64> poison, i64 31, i32 0
7685 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
7686 %d = ashr <2 x i64> %a, %c
7687 store <2 x i64> %d, ptr %x
7691 define void @ashr_vx_v16i8(ptr %x, i8 %y) {
7692 ; CHECK-LABEL: ashr_vx_v16i8:
7694 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7695 ; CHECK-NEXT: vle8.v v8, (a0)
7696 ; CHECK-NEXT: vsra.vx v8, v8, a1
7697 ; CHECK-NEXT: vse8.v v8, (a0)
7699 %a = load <16 x i8>, ptr %x
7700 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7701 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7702 %d = ashr <16 x i8> %a, %c
7703 store <16 x i8> %d, ptr %x
7707 define void @ashr_vx_v8i16(ptr %x, i16 %y) {
7708 ; CHECK-LABEL: ashr_vx_v8i16:
7710 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7711 ; CHECK-NEXT: vle16.v v8, (a0)
7712 ; CHECK-NEXT: vsra.vx v8, v8, a1
7713 ; CHECK-NEXT: vse16.v v8, (a0)
7715 %a = load <8 x i16>, ptr %x
7716 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7717 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7718 %d = ashr <8 x i16> %a, %c
7719 store <8 x i16> %d, ptr %x
7723 define void @ashr_vx_v4i32(ptr %x, i32 %y) {
7724 ; CHECK-LABEL: ashr_vx_v4i32:
7726 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7727 ; CHECK-NEXT: vle32.v v8, (a0)
7728 ; CHECK-NEXT: vsra.vx v8, v8, a1
7729 ; CHECK-NEXT: vse32.v v8, (a0)
7731 %a = load <4 x i32>, ptr %x
7732 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7733 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7734 %d = ashr <4 x i32> %a, %c
7735 store <4 x i32> %d, ptr %x
7739 define void @shl_vi_v16i8(ptr %x) {
7740 ; CHECK-LABEL: shl_vi_v16i8:
7742 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7743 ; CHECK-NEXT: vle8.v v8, (a0)
7744 ; CHECK-NEXT: vsll.vi v8, v8, 7
7745 ; CHECK-NEXT: vse8.v v8, (a0)
7747 %a = load <16 x i8>, ptr %x
7748 %b = insertelement <16 x i8> poison, i8 7, i32 0
7749 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7750 %d = shl <16 x i8> %a, %c
7751 store <16 x i8> %d, ptr %x
7755 define void @shl_vi_v8i16(ptr %x) {
7756 ; CHECK-LABEL: shl_vi_v8i16:
7758 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7759 ; CHECK-NEXT: vle16.v v8, (a0)
7760 ; CHECK-NEXT: vsll.vi v8, v8, 15
7761 ; CHECK-NEXT: vse16.v v8, (a0)
7763 %a = load <8 x i16>, ptr %x
7764 %b = insertelement <8 x i16> poison, i16 15, i32 0
7765 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7766 %d = shl <8 x i16> %a, %c
7767 store <8 x i16> %d, ptr %x
7771 define void @shl_vi_v4i32(ptr %x) {
7772 ; CHECK-LABEL: shl_vi_v4i32:
7774 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7775 ; CHECK-NEXT: vle32.v v8, (a0)
7776 ; CHECK-NEXT: vsll.vi v8, v8, 31
7777 ; CHECK-NEXT: vse32.v v8, (a0)
7779 %a = load <4 x i32>, ptr %x
7780 %b = insertelement <4 x i32> poison, i32 31, i32 0
7781 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7782 %d = shl <4 x i32> %a, %c
7783 store <4 x i32> %d, ptr %x
7787 define void @shl_vi_v2i64(ptr %x) {
7788 ; CHECK-LABEL: shl_vi_v2i64:
7790 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
7791 ; CHECK-NEXT: vle64.v v8, (a0)
7792 ; CHECK-NEXT: vsll.vi v8, v8, 31
7793 ; CHECK-NEXT: vse64.v v8, (a0)
7795 %a = load <2 x i64>, ptr %x
7796 %b = insertelement <2 x i64> poison, i64 31, i32 0
7797 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
7798 %d = shl <2 x i64> %a, %c
7799 store <2 x i64> %d, ptr %x
7803 define void @shl_vx_v16i8(ptr %x, i8 %y) {
7804 ; CHECK-LABEL: shl_vx_v16i8:
7806 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7807 ; CHECK-NEXT: vle8.v v8, (a0)
7808 ; CHECK-NEXT: vsll.vx v8, v8, a1
7809 ; CHECK-NEXT: vse8.v v8, (a0)
7811 %a = load <16 x i8>, ptr %x
7812 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7813 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7814 %d = shl <16 x i8> %a, %c
7815 store <16 x i8> %d, ptr %x
7819 define void @shl_vx_v8i16(ptr %x, i16 %y) {
7820 ; CHECK-LABEL: shl_vx_v8i16:
7822 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7823 ; CHECK-NEXT: vle16.v v8, (a0)
7824 ; CHECK-NEXT: vsll.vx v8, v8, a1
7825 ; CHECK-NEXT: vse16.v v8, (a0)
7827 %a = load <8 x i16>, ptr %x
7828 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7829 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7830 %d = shl <8 x i16> %a, %c
7831 store <8 x i16> %d, ptr %x
7835 define void @shl_vx_v4i32(ptr %x, i32 %y) {
7836 ; CHECK-LABEL: shl_vx_v4i32:
7838 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7839 ; CHECK-NEXT: vle32.v v8, (a0)
7840 ; CHECK-NEXT: vsll.vx v8, v8, a1
7841 ; CHECK-NEXT: vse32.v v8, (a0)
7843 %a = load <4 x i32>, ptr %x
7844 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7845 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7846 %d = shl <4 x i32> %a, %c
7847 store <4 x i32> %d, ptr %x
7851 define void @sdiv_vx_v16i8(ptr %x, i8 %y) {
7852 ; CHECK-LABEL: sdiv_vx_v16i8:
7854 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7855 ; CHECK-NEXT: vle8.v v8, (a0)
7856 ; CHECK-NEXT: vdiv.vx v8, v8, a1
7857 ; CHECK-NEXT: vse8.v v8, (a0)
7859 %a = load <16 x i8>, ptr %x
7860 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7861 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7862 %d = sdiv <16 x i8> %a, %c
7863 store <16 x i8> %d, ptr %x
7867 define void @sdiv_vx_v8i16(ptr %x, i16 %y) {
7868 ; CHECK-LABEL: sdiv_vx_v8i16:
7870 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7871 ; CHECK-NEXT: vle16.v v8, (a0)
7872 ; CHECK-NEXT: vdiv.vx v8, v8, a1
7873 ; CHECK-NEXT: vse16.v v8, (a0)
7875 %a = load <8 x i16>, ptr %x
7876 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7877 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7878 %d = sdiv <8 x i16> %a, %c
7879 store <8 x i16> %d, ptr %x
7883 define void @sdiv_vx_v4i32(ptr %x, i32 %y) {
7884 ; CHECK-LABEL: sdiv_vx_v4i32:
7886 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7887 ; CHECK-NEXT: vle32.v v8, (a0)
7888 ; CHECK-NEXT: vdiv.vx v8, v8, a1
7889 ; CHECK-NEXT: vse32.v v8, (a0)
7891 %a = load <4 x i32>, ptr %x
7892 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7893 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7894 %d = sdiv <4 x i32> %a, %c
7895 store <4 x i32> %d, ptr %x
7899 define void @srem_vx_v16i8(ptr %x, i8 %y) {
7900 ; CHECK-LABEL: srem_vx_v16i8:
7902 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7903 ; CHECK-NEXT: vle8.v v8, (a0)
7904 ; CHECK-NEXT: vrem.vx v8, v8, a1
7905 ; CHECK-NEXT: vse8.v v8, (a0)
7907 %a = load <16 x i8>, ptr %x
7908 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7909 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7910 %d = srem <16 x i8> %a, %c
7911 store <16 x i8> %d, ptr %x
7915 define void @srem_vx_v8i16(ptr %x, i16 %y) {
7916 ; CHECK-LABEL: srem_vx_v8i16:
7918 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7919 ; CHECK-NEXT: vle16.v v8, (a0)
7920 ; CHECK-NEXT: vrem.vx v8, v8, a1
7921 ; CHECK-NEXT: vse16.v v8, (a0)
7923 %a = load <8 x i16>, ptr %x
7924 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7925 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7926 %d = srem <8 x i16> %a, %c
7927 store <8 x i16> %d, ptr %x
7931 define void @srem_vx_v4i32(ptr %x, i32 %y) {
7932 ; CHECK-LABEL: srem_vx_v4i32:
7934 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7935 ; CHECK-NEXT: vle32.v v8, (a0)
7936 ; CHECK-NEXT: vrem.vx v8, v8, a1
7937 ; CHECK-NEXT: vse32.v v8, (a0)
7939 %a = load <4 x i32>, ptr %x
7940 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7941 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7942 %d = srem <4 x i32> %a, %c
7943 store <4 x i32> %d, ptr %x
7947 define void @udiv_vx_v16i8(ptr %x, i8 %y) {
7948 ; CHECK-LABEL: udiv_vx_v16i8:
7950 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7951 ; CHECK-NEXT: vle8.v v8, (a0)
7952 ; CHECK-NEXT: vdivu.vx v8, v8, a1
7953 ; CHECK-NEXT: vse8.v v8, (a0)
7955 %a = load <16 x i8>, ptr %x
7956 %b = insertelement <16 x i8> poison, i8 %y, i32 0
7957 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
7958 %d = udiv <16 x i8> %a, %c
7959 store <16 x i8> %d, ptr %x
7963 define void @udiv_vx_v8i16(ptr %x, i16 %y) {
7964 ; CHECK-LABEL: udiv_vx_v8i16:
7966 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7967 ; CHECK-NEXT: vle16.v v8, (a0)
7968 ; CHECK-NEXT: vdivu.vx v8, v8, a1
7969 ; CHECK-NEXT: vse16.v v8, (a0)
7971 %a = load <8 x i16>, ptr %x
7972 %b = insertelement <8 x i16> poison, i16 %y, i32 0
7973 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
7974 %d = udiv <8 x i16> %a, %c
7975 store <8 x i16> %d, ptr %x
7979 define void @udiv_vx_v4i32(ptr %x, i32 %y) {
7980 ; CHECK-LABEL: udiv_vx_v4i32:
7982 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
7983 ; CHECK-NEXT: vle32.v v8, (a0)
7984 ; CHECK-NEXT: vdivu.vx v8, v8, a1
7985 ; CHECK-NEXT: vse32.v v8, (a0)
7987 %a = load <4 x i32>, ptr %x
7988 %b = insertelement <4 x i32> poison, i32 %y, i32 0
7989 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
7990 %d = udiv <4 x i32> %a, %c
7991 store <4 x i32> %d, ptr %x
7995 define void @urem_vx_v16i8(ptr %x, i8 %y) {
7996 ; CHECK-LABEL: urem_vx_v16i8:
7998 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
7999 ; CHECK-NEXT: vle8.v v8, (a0)
8000 ; CHECK-NEXT: vremu.vx v8, v8, a1
8001 ; CHECK-NEXT: vse8.v v8, (a0)
8003 %a = load <16 x i8>, ptr %x
8004 %b = insertelement <16 x i8> poison, i8 %y, i32 0
8005 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
8006 %d = urem <16 x i8> %a, %c
8007 store <16 x i8> %d, ptr %x
8011 define void @urem_vx_v8i16(ptr %x, i16 %y) {
8012 ; CHECK-LABEL: urem_vx_v8i16:
8014 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8015 ; CHECK-NEXT: vle16.v v8, (a0)
8016 ; CHECK-NEXT: vremu.vx v8, v8, a1
8017 ; CHECK-NEXT: vse16.v v8, (a0)
8019 %a = load <8 x i16>, ptr %x
8020 %b = insertelement <8 x i16> poison, i16 %y, i32 0
8021 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
8022 %d = urem <8 x i16> %a, %c
8023 store <8 x i16> %d, ptr %x
8027 define void @urem_vx_v4i32(ptr %x, i32 %y) {
8028 ; CHECK-LABEL: urem_vx_v4i32:
8030 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8031 ; CHECK-NEXT: vle32.v v8, (a0)
8032 ; CHECK-NEXT: vremu.vx v8, v8, a1
8033 ; CHECK-NEXT: vse32.v v8, (a0)
8035 %a = load <4 x i32>, ptr %x
8036 %b = insertelement <4 x i32> poison, i32 %y, i32 0
8037 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
8038 %d = urem <4 x i32> %a, %c
8039 store <4 x i32> %d, ptr %x
8043 define void @mulhu_vx_v16i8(ptr %x) {
8044 ; CHECK-LABEL: mulhu_vx_v16i8:
8046 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
8047 ; CHECK-NEXT: vle8.v v8, (a0)
8048 ; CHECK-NEXT: li a1, 57
8049 ; CHECK-NEXT: vmulhu.vx v8, v8, a1
8050 ; CHECK-NEXT: vsrl.vi v8, v8, 1
8051 ; CHECK-NEXT: vse8.v v8, (a0)
8053 %a = load <16 x i8>, ptr %x
8054 %b = udiv <16 x i8> %a, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
8055 store <16 x i8> %b, ptr %x
8059 define void @mulhu_vx_v8i16(ptr %x) {
8060 ; CHECK-LABEL: mulhu_vx_v8i16:
8062 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8063 ; CHECK-NEXT: vle16.v v8, (a0)
8064 ; CHECK-NEXT: lui a1, 2
8065 ; CHECK-NEXT: addi a1, a1, 1171
8066 ; CHECK-NEXT: vmulhu.vx v9, v8, a1
8067 ; CHECK-NEXT: vsub.vv v8, v8, v9
8068 ; CHECK-NEXT: vsrl.vi v8, v8, 1
8069 ; CHECK-NEXT: vadd.vv v8, v8, v9
8070 ; CHECK-NEXT: vsrl.vi v8, v8, 2
8071 ; CHECK-NEXT: vse16.v v8, (a0)
8073 %a = load <8 x i16>, ptr %x
8074 %b = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
8075 store <8 x i16> %b, ptr %x
8079 define void @mulhu_vx_v4i32(ptr %x) {
8080 ; CHECK-LABEL: mulhu_vx_v4i32:
8082 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8083 ; CHECK-NEXT: vle32.v v8, (a0)
8084 ; CHECK-NEXT: lui a1, 838861
8085 ; CHECK-NEXT: addi a1, a1, -819
8086 ; CHECK-NEXT: vmulhu.vx v8, v8, a1
8087 ; CHECK-NEXT: vsrl.vi v8, v8, 2
8088 ; CHECK-NEXT: vse32.v v8, (a0)
8090 %a = load <4 x i32>, ptr %x
8091 %b = udiv <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
8092 store <4 x i32> %b, ptr %x
8096 define void @mulhu_vx_v2i64(ptr %x) {
8097 ; RV32-LABEL: mulhu_vx_v2i64:
8099 ; RV32-NEXT: addi sp, sp, -16
8100 ; RV32-NEXT: .cfi_def_cfa_offset 16
8101 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
8102 ; RV32-NEXT: vle64.v v8, (a0)
8103 ; RV32-NEXT: lui a1, 699051
8104 ; RV32-NEXT: addi a2, a1, -1366
8105 ; RV32-NEXT: sw a2, 12(sp)
8106 ; RV32-NEXT: addi a1, a1, -1365
8107 ; RV32-NEXT: sw a1, 8(sp)
8108 ; RV32-NEXT: addi a1, sp, 8
8109 ; RV32-NEXT: vlse64.v v9, (a1), zero
8110 ; RV32-NEXT: vmulhu.vv v8, v8, v9
8111 ; RV32-NEXT: vsrl.vi v8, v8, 1
8112 ; RV32-NEXT: vse64.v v8, (a0)
8113 ; RV32-NEXT: addi sp, sp, 16
8116 ; RV64-LABEL: mulhu_vx_v2i64:
8118 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
8119 ; RV64-NEXT: vle64.v v8, (a0)
8120 ; RV64-NEXT: lui a1, 699051
8121 ; RV64-NEXT: addiw a1, a1, -1365
8122 ; RV64-NEXT: slli a2, a1, 32
8123 ; RV64-NEXT: add a1, a1, a2
8124 ; RV64-NEXT: vmulhu.vx v8, v8, a1
8125 ; RV64-NEXT: vsrl.vi v8, v8, 1
8126 ; RV64-NEXT: vse64.v v8, (a0)
8128 %a = load <2 x i64>, ptr %x
8129 %b = udiv <2 x i64> %a, <i64 3, i64 3>
8130 store <2 x i64> %b, ptr %x
8134 define void @mulhs_vx_v16i8(ptr %x) {
8135 ; CHECK-LABEL: mulhs_vx_v16i8:
8137 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
8138 ; CHECK-NEXT: vle8.v v8, (a0)
8139 ; CHECK-NEXT: li a1, -123
8140 ; CHECK-NEXT: vmulhu.vx v8, v8, a1
8141 ; CHECK-NEXT: vsrl.vi v8, v8, 7
8142 ; CHECK-NEXT: vse8.v v8, (a0)
8144 %a = load <16 x i8>, ptr %x
8145 %b = udiv <16 x i8> %a, <i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9>
8146 store <16 x i8> %b, ptr %x
8150 define void @mulhs_vx_v8i16(ptr %x) {
8151 ; CHECK-LABEL: mulhs_vx_v8i16:
8153 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8154 ; CHECK-NEXT: vle16.v v8, (a0)
8155 ; CHECK-NEXT: lui a1, 5
8156 ; CHECK-NEXT: addi a1, a1, -1755
8157 ; CHECK-NEXT: vmulh.vx v8, v8, a1
8158 ; CHECK-NEXT: vsra.vi v8, v8, 1
8159 ; CHECK-NEXT: vsrl.vi v9, v8, 15
8160 ; CHECK-NEXT: vadd.vv v8, v8, v9
8161 ; CHECK-NEXT: vse16.v v8, (a0)
8163 %a = load <8 x i16>, ptr %x
8164 %b = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
8165 store <8 x i16> %b, ptr %x
8169 define void @mulhs_vx_v4i32(ptr %x) {
8170 ; RV32-LABEL: mulhs_vx_v4i32:
8172 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8173 ; RV32-NEXT: vle32.v v8, (a0)
8174 ; RV32-NEXT: lui a1, 629146
8175 ; RV32-NEXT: addi a1, a1, -1639
8176 ; RV32-NEXT: vmulh.vx v8, v8, a1
8177 ; RV32-NEXT: vsrl.vi v9, v8, 31
8178 ; RV32-NEXT: vsra.vi v8, v8, 1
8179 ; RV32-NEXT: vadd.vv v8, v8, v9
8180 ; RV32-NEXT: vse32.v v8, (a0)
8183 ; RV64-LABEL: mulhs_vx_v4i32:
8185 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8186 ; RV64-NEXT: vle32.v v8, (a0)
8187 ; RV64-NEXT: lui a1, 629146
8188 ; RV64-NEXT: addi a1, a1, -1639
8189 ; RV64-NEXT: vmulh.vx v8, v8, a1
8190 ; RV64-NEXT: vsra.vi v8, v8, 1
8191 ; RV64-NEXT: vsrl.vi v9, v8, 31
8192 ; RV64-NEXT: vadd.vv v8, v8, v9
8193 ; RV64-NEXT: vse32.v v8, (a0)
8195 %a = load <4 x i32>, ptr %x
8196 %b = sdiv <4 x i32> %a, <i32 -5, i32 -5, i32 -5, i32 -5>
8197 store <4 x i32> %b, ptr %x
8201 define void @mulhs_vx_v2i64(ptr %x) {
8202 ; RV32-LABEL: mulhs_vx_v2i64:
8204 ; RV32-NEXT: addi sp, sp, -16
8205 ; RV32-NEXT: .cfi_def_cfa_offset 16
8206 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
8207 ; RV32-NEXT: vle64.v v8, (a0)
8208 ; RV32-NEXT: lui a1, 349525
8209 ; RV32-NEXT: addi a2, a1, 1365
8210 ; RV32-NEXT: sw a2, 12(sp)
8211 ; RV32-NEXT: addi a1, a1, 1366
8212 ; RV32-NEXT: sw a1, 8(sp)
8213 ; RV32-NEXT: addi a1, sp, 8
8214 ; RV32-NEXT: vlse64.v v9, (a1), zero
8215 ; RV32-NEXT: vmulh.vv v8, v8, v9
8216 ; RV32-NEXT: li a1, 63
8217 ; RV32-NEXT: vsrl.vx v9, v8, a1
8218 ; RV32-NEXT: vadd.vv v8, v8, v9
8219 ; RV32-NEXT: vse64.v v8, (a0)
8220 ; RV32-NEXT: addi sp, sp, 16
8223 ; RV64-LABEL: mulhs_vx_v2i64:
8225 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
8226 ; RV64-NEXT: vle64.v v8, (a0)
8227 ; RV64-NEXT: lui a1, %hi(.LCPI321_0)
8228 ; RV64-NEXT: ld a1, %lo(.LCPI321_0)(a1)
8229 ; RV64-NEXT: vmulh.vx v8, v8, a1
8230 ; RV64-NEXT: li a1, 63
8231 ; RV64-NEXT: vsrl.vx v9, v8, a1
8232 ; RV64-NEXT: vadd.vv v8, v8, v9
8233 ; RV64-NEXT: vse64.v v8, (a0)
8235 %a = load <2 x i64>, ptr %x
8236 %b = sdiv <2 x i64> %a, <i64 3, i64 3>
8237 store <2 x i64> %b, ptr %x