1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -O0 -mtriple=aarch64-apple-ios -global-isel -disable-expand-reductions -stop-after=irtranslator %s -o - | FileCheck %s
4 declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
5 declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>)
7 define float @fadd_seq(float %start, <4 x float> %vec) {
8 ; CHECK-LABEL: name: fadd_seq
9 ; CHECK: bb.1 (%ir-block.0):
10 ; CHECK: liveins: $q1, $s0
11 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
12 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
13 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
14 ; CHECK: [[VECREDUCE_SEQ_FADD:%[0-9]+]]:_(s32) = G_VECREDUCE_SEQ_FADD [[COPY]](s32), [[BITCAST]](<4 x s32>)
15 ; CHECK: $s0 = COPY [[VECREDUCE_SEQ_FADD]](s32)
16 ; CHECK: RET_ReallyLR implicit $s0
17 %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec)
21 define float @fadd_fast(float %start, <4 x float> %vec) {
22 ; CHECK-LABEL: name: fadd_fast
23 ; CHECK: bb.1 (%ir-block.0):
24 ; CHECK: liveins: $q1, $s0
25 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
26 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
27 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
28 ; CHECK: [[VECREDUCE_FADD:%[0-9]+]]:_(s32) = reassoc G_VECREDUCE_FADD [[BITCAST]](<4 x s32>)
29 ; CHECK: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY]], [[VECREDUCE_FADD]]
30 ; CHECK: $s0 = COPY [[FADD]](s32)
31 ; CHECK: RET_ReallyLR implicit $s0
32 %res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec)
36 define double @fmul_seq(double %start, <4 x double> %vec) {
37 ; CHECK-LABEL: name: fmul_seq
38 ; CHECK: bb.1 (%ir-block.0):
39 ; CHECK: liveins: $d0, $q1, $q2
40 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
41 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
42 ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
43 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
44 ; CHECK: [[VECREDUCE_SEQ_FMUL:%[0-9]+]]:_(s64) = G_VECREDUCE_SEQ_FMUL [[COPY]](s64), [[CONCAT_VECTORS]](<4 x s64>)
45 ; CHECK: $d0 = COPY [[VECREDUCE_SEQ_FMUL]](s64)
46 ; CHECK: RET_ReallyLR implicit $d0
47 %res = call double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec)
51 define double @fmul_fast(double %start, <4 x double> %vec) {
52 ; CHECK-LABEL: name: fmul_fast
53 ; CHECK: bb.1 (%ir-block.0):
54 ; CHECK: liveins: $d0, $q1, $q2
55 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
56 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
57 ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
58 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>)
59 ; CHECK: [[VECREDUCE_FMUL:%[0-9]+]]:_(s64) = reassoc G_VECREDUCE_FMUL [[CONCAT_VECTORS]](<4 x s64>)
60 ; CHECK: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[COPY]], [[VECREDUCE_FMUL]]
61 ; CHECK: $d0 = COPY [[FMUL]](s64)
62 ; CHECK: RET_ReallyLR implicit $d0
63 %res = call reassoc double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec)
67 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
68 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
69 declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
70 declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
72 define float @fmax(<4 x float> %vec) {
73 ; CHECK-LABEL: name: fmax
74 ; CHECK: bb.1 (%ir-block.0):
76 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
77 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
78 ; CHECK: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[BITCAST]](<4 x s32>)
79 ; CHECK: $s0 = COPY [[VECREDUCE_FMAX]](s32)
80 ; CHECK: RET_ReallyLR implicit $s0
81 %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %vec)
85 define float @fmin(<4 x float> %vec) {
86 ; CHECK-LABEL: name: fmin
87 ; CHECK: bb.1 (%ir-block.0):
89 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
90 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
91 ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
92 ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32)
93 ; CHECK: RET_ReallyLR implicit $s0
94 %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec)
98 define float @fmin_nnan(<4 x float> %vec) {
99 ; CHECK-LABEL: name: fmin_nnan
100 ; CHECK: bb.1 (%ir-block.0):
101 ; CHECK: liveins: $q0
102 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
103 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
104 ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>)
105 ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32)
106 ; CHECK: RET_ReallyLR implicit $s0
107 %res = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec)
111 define float @fmaximum(<4 x float> %vec) {
112 ; CHECK-LABEL: name: fmaximum
113 ; CHECK: bb.1 (%ir-block.0):
114 ; CHECK: liveins: $q0
115 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
116 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
117 ; CHECK: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[BITCAST]](<4 x s32>)
118 ; CHECK: $s0 = COPY [[VECREDUCE_FMAX]](s32)
119 ; CHECK: RET_ReallyLR implicit $s0
120 %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %vec)
124 define float @fminimum(<4 x float> %vec) {
125 ; CHECK-LABEL: name: fminimum
126 ; CHECK: bb.1 (%ir-block.0):
127 ; CHECK: liveins: $q0
128 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
129 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
130 ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
131 ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32)
132 ; CHECK: RET_ReallyLR implicit $s0
133 %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec)
137 define float @fminimum_nnan(<4 x float> %vec) {
138 ; CHECK-LABEL: name: fminimum_nnan
139 ; CHECK: bb.1 (%ir-block.0):
140 ; CHECK: liveins: $q0
141 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
142 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
143 ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>)
144 ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32)
145 ; CHECK: RET_ReallyLR implicit $s0
146 %res = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec)
150 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
152 define i32 @add(<4 x i32> %vec) {
153 ; CHECK-LABEL: name: add
154 ; CHECK: bb.1 (%ir-block.0):
155 ; CHECK: liveins: $q0
156 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
157 ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[COPY]](<4 x s32>)
158 ; CHECK: $w0 = COPY [[VECREDUCE_ADD]](s32)
159 ; CHECK: RET_ReallyLR implicit $w0
160 %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %vec)
164 declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
166 define i32 @mul(<4 x i32> %vec) {
167 ; CHECK-LABEL: name: mul
168 ; CHECK: bb.1 (%ir-block.0):
169 ; CHECK: liveins: $q0
170 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
171 ; CHECK: [[VECREDUCE_MUL:%[0-9]+]]:_(s32) = G_VECREDUCE_MUL [[COPY]](<4 x s32>)
172 ; CHECK: $w0 = COPY [[VECREDUCE_MUL]](s32)
173 ; CHECK: RET_ReallyLR implicit $w0
174 %res = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %vec)
178 declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
180 define i32 @and(<4 x i32> %vec) {
181 ; CHECK-LABEL: name: and
182 ; CHECK: bb.1 (%ir-block.0):
183 ; CHECK: liveins: $q0
184 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
185 ; CHECK: [[VECREDUCE_AND:%[0-9]+]]:_(s32) = G_VECREDUCE_AND [[COPY]](<4 x s32>)
186 ; CHECK: $w0 = COPY [[VECREDUCE_AND]](s32)
187 ; CHECK: RET_ReallyLR implicit $w0
188 %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %vec)
192 declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
194 define i32 @or(<4 x i32> %vec) {
195 ; CHECK-LABEL: name: or
196 ; CHECK: bb.1 (%ir-block.0):
197 ; CHECK: liveins: $q0
198 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
199 ; CHECK: [[VECREDUCE_OR:%[0-9]+]]:_(s32) = G_VECREDUCE_OR [[COPY]](<4 x s32>)
200 ; CHECK: $w0 = COPY [[VECREDUCE_OR]](s32)
201 ; CHECK: RET_ReallyLR implicit $w0
202 %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %vec)
206 declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
208 define i32 @xor(<4 x i32> %vec) {
209 ; CHECK-LABEL: name: xor
210 ; CHECK: bb.1 (%ir-block.0):
211 ; CHECK: liveins: $q0
212 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
213 ; CHECK: [[VECREDUCE_XOR:%[0-9]+]]:_(s32) = G_VECREDUCE_XOR [[COPY]](<4 x s32>)
214 ; CHECK: $w0 = COPY [[VECREDUCE_XOR]](s32)
215 ; CHECK: RET_ReallyLR implicit $w0
216 %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %vec)
220 declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
221 declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
222 declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
223 declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
225 define i32 @smax(<4 x i32> %vec) {
226 ; CHECK-LABEL: name: smax
227 ; CHECK: bb.1 (%ir-block.0):
228 ; CHECK: liveins: $q0
229 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
230 ; CHECK: [[VECREDUCE_SMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_SMAX [[COPY]](<4 x s32>)
231 ; CHECK: $w0 = COPY [[VECREDUCE_SMAX]](s32)
232 ; CHECK: RET_ReallyLR implicit $w0
233 %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec)
237 define i32 @smin(<4 x i32> %vec) {
238 ; CHECK-LABEL: name: smin
239 ; CHECK: bb.1 (%ir-block.0):
240 ; CHECK: liveins: $q0
241 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
242 ; CHECK: [[VECREDUCE_SMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_SMIN [[COPY]](<4 x s32>)
243 ; CHECK: $w0 = COPY [[VECREDUCE_SMIN]](s32)
244 ; CHECK: RET_ReallyLR implicit $w0
245 %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec)
249 define i32 @umax(<4 x i32> %vec) {
250 ; CHECK-LABEL: name: umax
251 ; CHECK: bb.1 (%ir-block.0):
252 ; CHECK: liveins: $q0
253 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
254 ; CHECK: [[VECREDUCE_UMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_UMAX [[COPY]](<4 x s32>)
255 ; CHECK: $w0 = COPY [[VECREDUCE_UMAX]](s32)
256 ; CHECK: RET_ReallyLR implicit $w0
257 %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vec)
261 define i32 @umin(<4 x i32> %vec) {
262 ; CHECK-LABEL: name: umin
263 ; CHECK: bb.1 (%ir-block.0):
264 ; CHECK: liveins: $q0
265 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
266 ; CHECK: [[VECREDUCE_UMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_UMIN [[COPY]](<4 x s32>)
267 ; CHECK: $w0 = COPY [[VECREDUCE_UMIN]](s32)
268 ; CHECK: RET_ReallyLR implicit $w0
269 %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vec)