1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
4 declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
5 declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
6 declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
7 declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
8 declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
9 declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
10 declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
11 declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
12 declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
13 declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
14 declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
15 declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
17 define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8(<16 x i8> %s1) {
18 ; CHECK-LABEL: vmaxv_s_v16i8:
20 ; CHECK-NEXT: mvn r0, #127
21 ; CHECK-NEXT: vmaxv.s8 r0, q0
23 %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1)
27 define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16(<8 x i16> %s1) {
28 ; CHECK-LABEL: vmaxv_s_v8i16:
30 ; CHECK-NEXT: movw r0, #32768
31 ; CHECK-NEXT: movt r0, #65535
32 ; CHECK-NEXT: vmaxv.s16 r0, q0
34 %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1)
38 define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32(<4 x i32> %s1) {
39 ; CHECK-LABEL: vmaxv_s_v4i32:
41 ; CHECK-NEXT: mov.w r0, #-2147483648
42 ; CHECK-NEXT: vmaxv.s32 r0, q0
44 %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %s1)
48 define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8(<16 x i8> %s1) {
49 ; CHECK-LABEL: vmaxv_u_v16i8:
51 ; CHECK-NEXT: movs r0, #0
52 ; CHECK-NEXT: vmaxv.u8 r0, q0
54 %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1)
58 define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16(<8 x i16> %s1) {
59 ; CHECK-LABEL: vmaxv_u_v8i16:
61 ; CHECK-NEXT: movs r0, #0
62 ; CHECK-NEXT: vmaxv.u16 r0, q0
64 %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1)
68 define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32(<4 x i32> %s1) {
69 ; CHECK-LABEL: vmaxv_u_v4i32:
71 ; CHECK-NEXT: movs r0, #0
72 ; CHECK-NEXT: vmaxv.u32 r0, q0
74 %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %s1)
78 define arm_aapcs_vfpcc i8 @vminv_s_v16i8(<16 x i8> %s1) {
79 ; CHECK-LABEL: vminv_s_v16i8:
81 ; CHECK-NEXT: movs r0, #127
82 ; CHECK-NEXT: vminv.s8 r0, q0
84 %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1)
88 define arm_aapcs_vfpcc i16 @vminv_s_v8i16(<8 x i16> %s1) {
89 ; CHECK-LABEL: vminv_s_v8i16:
91 ; CHECK-NEXT: movw r0, #32767
92 ; CHECK-NEXT: vminv.s16 r0, q0
94 %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1)
98 define arm_aapcs_vfpcc i32 @vminv_s_v4i32(<4 x i32> %s1) {
99 ; CHECK-LABEL: vminv_s_v4i32:
101 ; CHECK-NEXT: mvn r0, #-2147483648
102 ; CHECK-NEXT: vminv.s32 r0, q0
104 %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %s1)
108 define arm_aapcs_vfpcc i8 @vminv_u_v16i8(<16 x i8> %s1) {
109 ; CHECK-LABEL: vminv_u_v16i8:
111 ; CHECK-NEXT: movs r0, #255
112 ; CHECK-NEXT: vminv.u8 r0, q0
114 %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1)
118 define arm_aapcs_vfpcc i16 @vminv_u_v8i16(<8 x i16> %s1) {
119 ; CHECK-LABEL: vminv_u_v8i16:
121 ; CHECK-NEXT: movw r0, #65535
122 ; CHECK-NEXT: vminv.u16 r0, q0
124 %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1)
128 define arm_aapcs_vfpcc i32 @vminv_u_v4i32(<4 x i32> %s1) {
129 ; CHECK-LABEL: vminv_u_v4i32:
131 ; CHECK-NEXT: mov.w r0, #-1
132 ; CHECK-NEXT: vminv.u32 r0, q0
134 %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %s1)
140 define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) {
141 ; CHECK-LABEL: vmaxv_s_v16i8_i8:
143 ; CHECK-NEXT: vmaxv.s8 r0, q0
145 %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1)
146 %c = icmp sgt i8 %r, %s2
147 %s = select i1 %c, i8 %r, i8 %s2
151 define arm_aapcs_vfpcc i32 @vmaxv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) {
152 ; CHECK-LABEL: vmaxv_s_v16i8_i32:
154 ; CHECK-NEXT: mvn r1, #127
155 ; CHECK-NEXT: vmaxv.s8 r1, q0
156 ; CHECK-NEXT: sxtb r1, r1
157 ; CHECK-NEXT: cmp r1, r0
158 ; CHECK-NEXT: csel r0, r1, r0, gt
160 %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %s1)
161 %rs = sext i8 %r to i32
162 %c = icmp sgt i32 %rs, %s2
163 %s = select i1 %c, i32 %rs, i32 %s2
167 define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) {
168 ; CHECK-LABEL: vmaxv_s_v8i16_i16:
170 ; CHECK-NEXT: vmaxv.s16 r0, q0
172 %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1)
173 %c = icmp sgt i16 %r, %s2
174 %s = select i1 %c, i16 %r, i16 %s2
178 define arm_aapcs_vfpcc i32 @vmaxv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) {
179 ; CHECK-LABEL: vmaxv_s_v8i16_i32:
181 ; CHECK-NEXT: movw r1, #32768
182 ; CHECK-NEXT: movt r1, #65535
183 ; CHECK-NEXT: vmaxv.s16 r1, q0
184 ; CHECK-NEXT: sxth r1, r1
185 ; CHECK-NEXT: cmp r1, r0
186 ; CHECK-NEXT: csel r0, r1, r0, gt
188 %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %s1)
189 %rs = sext i16 %r to i32
190 %c = icmp sgt i32 %rs, %s2
191 %s = select i1 %c, i32 %rs, i32 %s2
195 define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) {
196 ; CHECK-LABEL: vmaxv_s_v4i32_i32:
198 ; CHECK-NEXT: vmaxv.s32 r0, q0
200 %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %s1)
201 %c = icmp sgt i32 %r, %s2
202 %s = select i1 %c, i32 %r, i32 %s2
206 define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) {
207 ; CHECK-LABEL: vmaxv_u_v16i8_i8:
209 ; CHECK-NEXT: vmaxv.u8 r0, q0
211 %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1)
212 %c = icmp ugt i8 %r, %s2
213 %s = select i1 %c, i8 %r, i8 %s2
217 define arm_aapcs_vfpcc i32 @vmaxv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) {
218 ; CHECK-LABEL: vmaxv_u_v16i8_i32:
220 ; CHECK-NEXT: movs r1, #0
221 ; CHECK-NEXT: vmaxv.u8 r1, q0
222 ; CHECK-NEXT: uxtb r1, r1
223 ; CHECK-NEXT: cmp r1, r0
224 ; CHECK-NEXT: csel r0, r1, r0, hi
226 %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %s1)
227 %rs = zext i8 %r to i32
228 %c = icmp ugt i32 %rs, %s2
229 %s = select i1 %c, i32 %rs, i32 %s2
233 define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) {
234 ; CHECK-LABEL: vmaxv_u_v8i16_i16:
236 ; CHECK-NEXT: vmaxv.u16 r0, q0
238 %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1)
239 %c = icmp ugt i16 %r, %s2
240 %s = select i1 %c, i16 %r, i16 %s2
244 define arm_aapcs_vfpcc i32 @vmaxv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) {
245 ; CHECK-LABEL: vmaxv_u_v8i16_i32:
247 ; CHECK-NEXT: movs r1, #0
248 ; CHECK-NEXT: vmaxv.u16 r1, q0
249 ; CHECK-NEXT: uxth r1, r1
250 ; CHECK-NEXT: cmp r1, r0
251 ; CHECK-NEXT: csel r0, r1, r0, hi
253 %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %s1)
254 %rs = zext i16 %r to i32
255 %c = icmp ugt i32 %rs, %s2
256 %s = select i1 %c, i32 %rs, i32 %s2
260 define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) {
261 ; CHECK-LABEL: vmaxv_u_v4i32_i32:
263 ; CHECK-NEXT: vmaxv.u32 r0, q0
265 %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %s1)
266 %c = icmp ugt i32 %r, %s2
267 %s = select i1 %c, i32 %r, i32 %s2
271 define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) {
272 ; CHECK-LABEL: vminv_s_v16i8_i8:
274 ; CHECK-NEXT: vminv.s8 r0, q0
276 %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1)
277 %c = icmp slt i8 %r, %s2
278 %s = select i1 %c, i8 %r, i8 %s2
282 define arm_aapcs_vfpcc i32 @vminv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) {
283 ; CHECK-LABEL: vminv_s_v16i8_i32:
285 ; CHECK-NEXT: movs r1, #127
286 ; CHECK-NEXT: vminv.s8 r1, q0
287 ; CHECK-NEXT: sxtb r1, r1
288 ; CHECK-NEXT: cmp r1, r0
289 ; CHECK-NEXT: csel r0, r1, r0, lt
291 %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %s1)
292 %rs = sext i8 %r to i32
293 %c = icmp slt i32 %rs, %s2
294 %s = select i1 %c, i32 %rs, i32 %s2
298 define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) {
299 ; CHECK-LABEL: vminv_s_v8i16_i16:
301 ; CHECK-NEXT: vminv.s16 r0, q0
303 %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1)
304 %c = icmp slt i16 %r, %s2
305 %s = select i1 %c, i16 %r, i16 %s2
309 define arm_aapcs_vfpcc i32 @vminv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) {
310 ; CHECK-LABEL: vminv_s_v8i16_i32:
312 ; CHECK-NEXT: movw r1, #32767
313 ; CHECK-NEXT: vminv.s16 r1, q0
314 ; CHECK-NEXT: sxth r1, r1
315 ; CHECK-NEXT: cmp r1, r0
316 ; CHECK-NEXT: csel r0, r1, r0, lt
318 %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %s1)
319 %rs = sext i16 %r to i32
320 %c = icmp slt i32 %rs, %s2
321 %s = select i1 %c, i32 %rs, i32 %s2
325 define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) {
326 ; CHECK-LABEL: vminv_s_v4i32_i32:
328 ; CHECK-NEXT: vminv.s32 r0, q0
330 %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %s1)
331 %c = icmp slt i32 %r, %s2
332 %s = select i1 %c, i32 %r, i32 %s2
336 define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) {
337 ; CHECK-LABEL: vminv_u_v16i8_i8:
339 ; CHECK-NEXT: vminv.u8 r0, q0
341 %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1)
342 %c = icmp ult i8 %r, %s2
343 %s = select i1 %c, i8 %r, i8 %s2
347 define arm_aapcs_vfpcc i32 @vminv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) {
348 ; CHECK-LABEL: vminv_u_v16i8_i32:
350 ; CHECK-NEXT: movs r1, #255
351 ; CHECK-NEXT: vminv.u8 r1, q0
352 ; CHECK-NEXT: uxtb r1, r1
353 ; CHECK-NEXT: cmp r1, r0
354 ; CHECK-NEXT: csel r0, r1, r0, lo
356 %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %s1)
357 %rs = zext i8 %r to i32
358 %c = icmp ult i32 %rs, %s2
359 %s = select i1 %c, i32 %rs, i32 %s2
363 define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) {
364 ; CHECK-LABEL: vminv_u_v8i16_i16:
366 ; CHECK-NEXT: vminv.u16 r0, q0
368 %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1)
369 %c = icmp ult i16 %r, %s2
370 %s = select i1 %c, i16 %r, i16 %s2
374 define arm_aapcs_vfpcc i32 @vminv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) {
375 ; CHECK-LABEL: vminv_u_v8i16_i32:
377 ; CHECK-NEXT: movw r1, #65535
378 ; CHECK-NEXT: vminv.u16 r1, q0
379 ; CHECK-NEXT: uxth r1, r1
380 ; CHECK-NEXT: cmp r1, r0
381 ; CHECK-NEXT: csel r0, r1, r0, lo
383 %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %s1)
384 %rs = zext i16 %r to i32
385 %c = icmp ult i32 %rs, %s2
386 %s = select i1 %c, i32 %rs, i32 %s2
390 define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) {
391 ; CHECK-LABEL: vminv_u_v4i32_i32:
393 ; CHECK-NEXT: vminv.u32 r0, q0
395 %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %s1)
396 %c = icmp ult i32 %r, %s2
397 %s = select i1 %c, i32 %r, i32 %s2