1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
6 define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) {
7 ; CHECK-LABEL: andv_nxv8i8:
9 ; CHECK-NEXT: ptrue p0.h
10 ; CHECK-NEXT: andv h0, p0, z0.h
11 ; CHECK-NEXT: fmov w0, s0
13 %res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a)
17 define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) {
18 ; CHECK-LABEL: andv_nxv8i32:
20 ; CHECK-NEXT: and z0.d, z0.d, z1.d
21 ; CHECK-NEXT: ptrue p0.s
22 ; CHECK-NEXT: andv s0, p0, z0.s
23 ; CHECK-NEXT: fmov w0, s0
25 %res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a)
31 define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) {
32 ; CHECK-LABEL: orv_nxv2i32:
34 ; CHECK-NEXT: ptrue p0.d
35 ; CHECK-NEXT: orv d0, p0, z0.d
36 ; CHECK-NEXT: fmov w0, s0
38 %res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a)
42 define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) {
43 ; CHECK-LABEL: orv_nxv8i64:
45 ; CHECK-NEXT: orr z1.d, z1.d, z3.d
46 ; CHECK-NEXT: orr z0.d, z0.d, z2.d
47 ; CHECK-NEXT: ptrue p0.d
48 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
49 ; CHECK-NEXT: orv d0, p0, z0.d
50 ; CHECK-NEXT: fmov x0, d0
52 %res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a)
58 define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) {
59 ; CHECK-LABEL: xorv_nxv2i16:
61 ; CHECK-NEXT: ptrue p0.d
62 ; CHECK-NEXT: eorv d0, p0, z0.d
63 ; CHECK-NEXT: fmov w0, s0
65 %res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a)
69 define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) {
70 ; CHECK-LABEL: xorv_nxv8i32:
72 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
73 ; CHECK-NEXT: ptrue p0.s
74 ; CHECK-NEXT: eorv s0, p0, z0.s
75 ; CHECK-NEXT: fmov w0, s0
77 %res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a)
83 define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) {
84 ; CHECK-LABEL: uaddv_nxv4i16:
86 ; CHECK-NEXT: ptrue p0.s
87 ; CHECK-NEXT: uaddv d0, p0, z0.s
88 ; CHECK-NEXT: fmov w0, s0
90 %res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a)
94 define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) {
95 ; CHECK-LABEL: uaddv_nxv16i16:
97 ; CHECK-NEXT: add z0.h, z0.h, z1.h
98 ; CHECK-NEXT: ptrue p0.h
99 ; CHECK-NEXT: uaddv d0, p0, z0.h
100 ; CHECK-NEXT: fmov w0, s0
102 %res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a)
106 define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) {
107 ; CHECK-LABEL: uaddv_nxv16i32:
109 ; CHECK-NEXT: add z1.s, z1.s, z3.s
110 ; CHECK-NEXT: add z0.s, z0.s, z2.s
111 ; CHECK-NEXT: ptrue p0.s
112 ; CHECK-NEXT: add z0.s, z0.s, z1.s
113 ; CHECK-NEXT: uaddv d0, p0, z0.s
114 ; CHECK-NEXT: fmov w0, s0
116 %res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a)
122 define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) {
123 ; CHECK-LABEL: umin_nxv2i32:
125 ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
126 ; CHECK-NEXT: ptrue p0.d
127 ; CHECK-NEXT: uminv d0, p0, z0.d
128 ; CHECK-NEXT: fmov w0, s0
130 %res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a)
134 define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) {
135 ; CHECK-LABEL: umin_nxv4i64:
137 ; CHECK-NEXT: ptrue p0.d
138 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
139 ; CHECK-NEXT: uminv d0, p0, z0.d
140 ; CHECK-NEXT: fmov x0, d0
142 %res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a)
148 define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) {
149 ; CHECK-LABEL: smin_nxv4i8:
151 ; CHECK-NEXT: ptrue p0.s
152 ; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
153 ; CHECK-NEXT: sminv s0, p0, z0.s
154 ; CHECK-NEXT: fmov w0, s0
156 %res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a)
160 define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) {
161 ; CHECK-LABEL: smin_nxv8i32:
163 ; CHECK-NEXT: ptrue p0.s
164 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
165 ; CHECK-NEXT: sminv s0, p0, z0.s
166 ; CHECK-NEXT: fmov w0, s0
168 %res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a)
174 define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) {
175 ; CHECK-LABEL: smin_nxv16i16:
177 ; CHECK-NEXT: ptrue p0.h
178 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
179 ; CHECK-NEXT: umaxv h0, p0, z0.h
180 ; CHECK-NEXT: fmov w0, s0
182 %res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a)
188 define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) {
189 ; CHECK-LABEL: smin_nxv8i64:
191 ; CHECK-NEXT: ptrue p0.d
192 ; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d
193 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d
194 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
195 ; CHECK-NEXT: smaxv d0, p0, z0.d
196 ; CHECK-NEXT: fmov x0, d0
198 %res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a)
202 declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>)
203 declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>)
205 declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
206 declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>)
208 declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
209 declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>)
211 declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
212 declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>)
213 declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>)
215 declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
216 declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
218 declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
219 declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>)
221 declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>)
223 declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>)