1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
6 define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) {
7 ; CHECK-LABEL: andv_nxv8i8:
9 ; CHECK-NEXT: ptrue p0.h
10 ; CHECK-NEXT: andv h0, p0, z0.h
11 ; CHECK-NEXT: fmov w0, s0
13 %res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a)
17 define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) {
18 ; CHECK-LABEL: andv_nxv8i32:
20 ; CHECK-NEXT: and z0.d, z0.d, z1.d
21 ; CHECK-NEXT: ptrue p0.s
22 ; CHECK-NEXT: andv s0, p0, z0.s
23 ; CHECK-NEXT: fmov w0, s0
25 %res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a)
31 define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) {
32 ; CHECK-LABEL: orv_nxv2i32:
34 ; CHECK-NEXT: ptrue p0.d
35 ; CHECK-NEXT: orv d0, p0, z0.d
36 ; CHECK-NEXT: fmov x0, d0
37 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
39 %res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a)
43 define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) {
44 ; CHECK-LABEL: orv_nxv8i64:
46 ; CHECK-NEXT: orr z1.d, z1.d, z3.d
47 ; CHECK-NEXT: orr z0.d, z0.d, z2.d
48 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
49 ; CHECK-NEXT: ptrue p0.d
50 ; CHECK-NEXT: orv d0, p0, z0.d
51 ; CHECK-NEXT: fmov x0, d0
53 %res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a)
59 define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) {
60 ; CHECK-LABEL: xorv_nxv2i16:
62 ; CHECK-NEXT: ptrue p0.d
63 ; CHECK-NEXT: eorv d0, p0, z0.d
64 ; CHECK-NEXT: fmov x0, d0
65 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
67 %res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a)
71 define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) {
72 ; CHECK-LABEL: xorv_nxv8i32:
74 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
75 ; CHECK-NEXT: ptrue p0.s
76 ; CHECK-NEXT: eorv s0, p0, z0.s
77 ; CHECK-NEXT: fmov w0, s0
79 %res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a)
85 define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) {
86 ; CHECK-LABEL: uaddv_nxv4i16:
88 ; CHECK-NEXT: ptrue p0.s
89 ; CHECK-NEXT: uaddv d0, p0, z0.s
90 ; CHECK-NEXT: fmov x0, d0
91 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
93 %res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a)
97 define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) {
98 ; CHECK-LABEL: uaddv_nxv16i16:
100 ; CHECK-NEXT: add z0.h, z0.h, z1.h
101 ; CHECK-NEXT: ptrue p0.h
102 ; CHECK-NEXT: uaddv d0, p0, z0.h
103 ; CHECK-NEXT: fmov x0, d0
104 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
106 %res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a)
110 define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) {
111 ; CHECK-LABEL: uaddv_nxv16i32:
113 ; CHECK-NEXT: add z1.s, z1.s, z3.s
114 ; CHECK-NEXT: add z0.s, z0.s, z2.s
115 ; CHECK-NEXT: add z0.s, z0.s, z1.s
116 ; CHECK-NEXT: ptrue p0.s
117 ; CHECK-NEXT: uaddv d0, p0, z0.s
118 ; CHECK-NEXT: fmov x0, d0
119 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
121 %res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a)
127 define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) {
128 ; CHECK-LABEL: umin_nxv2i32:
130 ; CHECK-NEXT: ptrue p0.d
131 ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
132 ; CHECK-NEXT: uminv d0, p0, z0.d
133 ; CHECK-NEXT: fmov x0, d0
134 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
136 %res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a)
140 define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) {
141 ; CHECK-LABEL: umin_nxv4i64:
143 ; CHECK-NEXT: ptrue p0.d
144 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
145 ; CHECK-NEXT: uminv d0, p0, z0.d
146 ; CHECK-NEXT: fmov x0, d0
148 %res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a)
154 define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) {
155 ; CHECK-LABEL: smin_nxv4i8:
157 ; CHECK-NEXT: ptrue p0.s
158 ; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
159 ; CHECK-NEXT: sminv s0, p0, z0.s
160 ; CHECK-NEXT: fmov w0, s0
162 %res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a)
166 define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) {
167 ; CHECK-LABEL: smin_nxv8i32:
169 ; CHECK-NEXT: ptrue p0.s
170 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
171 ; CHECK-NEXT: sminv s0, p0, z0.s
172 ; CHECK-NEXT: fmov w0, s0
174 %res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a)
180 define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) {
181 ; CHECK-LABEL: smin_nxv16i16:
183 ; CHECK-NEXT: ptrue p0.h
184 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
185 ; CHECK-NEXT: umaxv h0, p0, z0.h
186 ; CHECK-NEXT: fmov w0, s0
188 %res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a)
194 define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) {
195 ; CHECK-LABEL: smin_nxv8i64:
197 ; CHECK-NEXT: ptrue p0.d
198 ; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d
199 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d
200 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
201 ; CHECK-NEXT: smaxv d0, p0, z0.d
202 ; CHECK-NEXT: fmov x0, d0
204 %res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a)
208 declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>)
209 declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>)
211 declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
212 declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>)
214 declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
215 declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>)
217 declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
218 declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>)
219 declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>)
221 declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
222 declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
224 declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
225 declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>)
227 declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>)
229 declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>)