1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
3 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
5 ; Negative test to ensure we don't try to generate a vector reduce when
6 ; vector instructions are not available.
8 define i32 @reduce_sum_4xi32(<4 x i32> %v) {
9 ; RV32-LABEL: reduce_sum_4xi32:
11 ; RV32-NEXT: lw a1, 12(a0)
12 ; RV32-NEXT: lw a2, 4(a0)
13 ; RV32-NEXT: lw a3, 0(a0)
14 ; RV32-NEXT: lw a0, 8(a0)
15 ; RV32-NEXT: add a2, a3, a2
16 ; RV32-NEXT: add a0, a0, a1
17 ; RV32-NEXT: add a0, a2, a0
20 ; RV64-LABEL: reduce_sum_4xi32:
22 ; RV64-NEXT: lw a1, 24(a0)
23 ; RV64-NEXT: lw a2, 8(a0)
24 ; RV64-NEXT: lw a3, 0(a0)
25 ; RV64-NEXT: lw a0, 16(a0)
26 ; RV64-NEXT: add a2, a3, a2
27 ; RV64-NEXT: add a0, a0, a1
28 ; RV64-NEXT: addw a0, a2, a0
30 %e0 = extractelement <4 x i32> %v, i32 0
31 %e1 = extractelement <4 x i32> %v, i32 1
32 %e2 = extractelement <4 x i32> %v, i32 2
33 %e3 = extractelement <4 x i32> %v, i32 3
34 %add0 = add i32 %e0, %e1
35 %add1 = add i32 %add0, %e2
36 %add2 = add i32 %add1, %e3
40 define i32 @reduce_xor_4xi32(<4 x i32> %v) {
41 ; RV32-LABEL: reduce_xor_4xi32:
43 ; RV32-NEXT: lw a1, 12(a0)
44 ; RV32-NEXT: lw a2, 4(a0)
45 ; RV32-NEXT: lw a3, 0(a0)
46 ; RV32-NEXT: lw a0, 8(a0)
47 ; RV32-NEXT: xor a2, a3, a2
48 ; RV32-NEXT: xor a0, a0, a1
49 ; RV32-NEXT: xor a0, a2, a0
52 ; RV64-LABEL: reduce_xor_4xi32:
54 ; RV64-NEXT: ld a1, 24(a0)
55 ; RV64-NEXT: ld a2, 8(a0)
56 ; RV64-NEXT: ld a3, 0(a0)
57 ; RV64-NEXT: ld a0, 16(a0)
58 ; RV64-NEXT: xor a2, a3, a2
59 ; RV64-NEXT: xor a0, a0, a1
60 ; RV64-NEXT: xor a0, a2, a0
62 %e0 = extractelement <4 x i32> %v, i32 0
63 %e1 = extractelement <4 x i32> %v, i32 1
64 %e2 = extractelement <4 x i32> %v, i32 2
65 %e3 = extractelement <4 x i32> %v, i32 3
66 %xor0 = xor i32 %e0, %e1
67 %xor1 = xor i32 %xor0, %e2
68 %xor2 = xor i32 %xor1, %e3
72 define i32 @reduce_or_4xi32(<4 x i32> %v) {
73 ; RV32-LABEL: reduce_or_4xi32:
75 ; RV32-NEXT: lw a1, 12(a0)
76 ; RV32-NEXT: lw a2, 4(a0)
77 ; RV32-NEXT: lw a3, 0(a0)
78 ; RV32-NEXT: lw a0, 8(a0)
79 ; RV32-NEXT: or a2, a3, a2
80 ; RV32-NEXT: or a0, a0, a1
81 ; RV32-NEXT: or a0, a2, a0
84 ; RV64-LABEL: reduce_or_4xi32:
86 ; RV64-NEXT: ld a1, 24(a0)
87 ; RV64-NEXT: ld a2, 8(a0)
88 ; RV64-NEXT: ld a3, 0(a0)
89 ; RV64-NEXT: ld a0, 16(a0)
90 ; RV64-NEXT: or a2, a3, a2
91 ; RV64-NEXT: or a0, a0, a1
92 ; RV64-NEXT: or a0, a2, a0
94 %e0 = extractelement <4 x i32> %v, i32 0
95 %e1 = extractelement <4 x i32> %v, i32 1
96 %e2 = extractelement <4 x i32> %v, i32 2
97 %e3 = extractelement <4 x i32> %v, i32 3
98 %or0 = or i32 %e0, %e1
99 %or1 = or i32 %or0, %e2
100 %or2 = or i32 %or1, %e3