1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s
4 declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone
6 define arm_aapcs_vfpcc i8 @smaxi8(i8 %a, i8 %b) {
9 ; CHECK-NEXT: sxtb r1, r1
10 ; CHECK-NEXT: sxtb r0, r0
11 ; CHECK-NEXT: cmp r0, r1
12 ; CHECK-NEXT: csel r0, r0, r1, gt
14 %c = call i8 @llvm.smax.i8(i8 %a, i8 %b)
18 declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone
20 define arm_aapcs_vfpcc i16 @smaxi16(i16 %a, i16 %b) {
21 ; CHECK-LABEL: smaxi16:
23 ; CHECK-NEXT: sxth r1, r1
24 ; CHECK-NEXT: sxth r0, r0
25 ; CHECK-NEXT: cmp r0, r1
26 ; CHECK-NEXT: csel r0, r0, r1, gt
28 %c = call i16 @llvm.smax.i16(i16 %a, i16 %b)
32 declare i32 @llvm.smax.i32(i32 %a, i32 %b) readnone
34 define arm_aapcs_vfpcc i32 @smaxi32(i32 %a, i32 %b) {
35 ; CHECK-LABEL: smaxi32:
37 ; CHECK-NEXT: cmp r0, r1
38 ; CHECK-NEXT: csel r0, r0, r1, gt
40 %c = call i32 @llvm.smax.i32(i32 %a, i32 %b)
44 declare i64 @llvm.smax.i64(i64 %a, i64 %b) readnone
46 define arm_aapcs_vfpcc i64 @smaxi64(i64 %a, i64 %b) {
47 ; CHECK-LABEL: smaxi64:
49 ; CHECK-NEXT: subs.w r12, r2, r0
50 ; CHECK-NEXT: sbcs.w r12, r3, r1
51 ; CHECK-NEXT: csel r0, r0, r2, lt
52 ; CHECK-NEXT: csel r1, r1, r3, lt
54 %c = call i64 @llvm.smax.i64(i64 %a, i64 %b)
58 declare <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b) readnone
60 define arm_aapcs_vfpcc <8 x i8> @smax8i8(<8 x i8> %a, <8 x i8> %b) {
61 ; CHECK-LABEL: smax8i8:
63 ; CHECK-NEXT: vmovlb.s8 q1, q1
64 ; CHECK-NEXT: vmovlb.s8 q0, q0
65 ; CHECK-NEXT: vmax.s16 q0, q0, q1
67 %c = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
71 declare <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b) readnone
73 define arm_aapcs_vfpcc <16 x i8> @smax16i8(<16 x i8> %a, <16 x i8> %b) {
74 ; CHECK-LABEL: smax16i8:
76 ; CHECK-NEXT: vmax.s8 q0, q0, q1
78 %c = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
82 declare <32 x i8> @llvm.smax.v32i8(<32 x i8> %a, <32 x i8> %b) readnone
84 define arm_aapcs_vfpcc void @smax32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) {
85 ; CHECK-LABEL: smax32i8:
87 ; CHECK-NEXT: vmax.s8 q1, q1, q3
88 ; CHECK-NEXT: vmax.s8 q0, q0, q2
89 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
90 ; CHECK-NEXT: vstrw.32 q0, [r0]
92 %c = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %a, <32 x i8> %b)
93 store <32 x i8> %c, ptr %p
97 declare <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b) readnone
99 define arm_aapcs_vfpcc <4 x i16> @smax4i16(<4 x i16> %a, <4 x i16> %b) {
100 ; CHECK-LABEL: smax4i16:
102 ; CHECK-NEXT: vmovlb.s16 q1, q1
103 ; CHECK-NEXT: vmovlb.s16 q0, q0
104 ; CHECK-NEXT: vmax.s32 q0, q0, q1
106 %c = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
110 declare <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b) readnone
112 define arm_aapcs_vfpcc <8 x i16> @smax8i16(<8 x i16> %a, <8 x i16> %b) {
113 ; CHECK-LABEL: smax8i16:
115 ; CHECK-NEXT: vmax.s16 q0, q0, q1
117 %c = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
121 declare <16 x i16> @llvm.smax.v16i16(<16 x i16> %a, <16 x i16> %b) readnone
123 define arm_aapcs_vfpcc void @smax16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) {
124 ; CHECK-LABEL: smax16i16:
126 ; CHECK-NEXT: vmax.s16 q1, q1, q3
127 ; CHECK-NEXT: vmax.s16 q0, q0, q2
128 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
129 ; CHECK-NEXT: vstrw.32 q0, [r0]
131 %c = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %a, <16 x i16> %b)
132 store <16 x i16> %c, ptr %p
136 declare <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b) readnone
138 define arm_aapcs_vfpcc <2 x i32> @smax2i32(<2 x i32> %a, <2 x i32> %b) {
139 ; CHECK-LABEL: smax2i32:
141 ; CHECK-NEXT: .save {r4, r5, r7, lr}
142 ; CHECK-NEXT: push {r4, r5, r7, lr}
143 ; CHECK-NEXT: vmov r1, s4
144 ; CHECK-NEXT: vmov r3, s0
145 ; CHECK-NEXT: vmov r0, s6
146 ; CHECK-NEXT: vmov r2, s2
147 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
148 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
149 ; CHECK-NEXT: asr.w lr, r1, #31
150 ; CHECK-NEXT: subs r1, r1, r3
151 ; CHECK-NEXT: sbcs.w r1, lr, r3, asr #31
152 ; CHECK-NEXT: asr.w r5, r3, #31
153 ; CHECK-NEXT: asr.w r12, r0, #31
154 ; CHECK-NEXT: csetm r1, lt
155 ; CHECK-NEXT: subs r0, r0, r2
156 ; CHECK-NEXT: mov.w r3, #0
157 ; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31
158 ; CHECK-NEXT: bfi r3, r1, #0, #8
159 ; CHECK-NEXT: csetm r0, lt
160 ; CHECK-NEXT: asrs r4, r2, #31
161 ; CHECK-NEXT: bfi r3, r0, #8, #8
162 ; CHECK-NEXT: vmov q1[3], q1[1], lr, r12
163 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r4
164 ; CHECK-NEXT: vmsr p0, r3
165 ; CHECK-NEXT: vpsel q0, q0, q1
166 ; CHECK-NEXT: pop {r4, r5, r7, pc}
167 %c = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
171 declare <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) readnone
173 define arm_aapcs_vfpcc <4 x i32> @smax4i32(<4 x i32> %a, <4 x i32> %b) {
174 ; CHECK-LABEL: smax4i32:
176 ; CHECK-NEXT: vmax.s32 q0, q0, q1
178 %c = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
182 declare <8 x i32> @llvm.smax.v8i32(<8 x i32> %a, <8 x i32> %b) readnone
184 define arm_aapcs_vfpcc void @smax8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) {
185 ; CHECK-LABEL: smax8i32:
187 ; CHECK-NEXT: vmax.s32 q1, q1, q3
188 ; CHECK-NEXT: vmax.s32 q0, q0, q2
189 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
190 ; CHECK-NEXT: vstrw.32 q0, [r0]
192 %c = call <8 x i32>@llvm.smax.v8i32(<8 x i32> %a, <8 x i32> %b)
193 store <8 x i32> %c, ptr %p
197 declare <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
199 define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
200 ; CHECK-LABEL: smax1i64:
202 ; CHECK-NEXT: .pad #8
203 ; CHECK-NEXT: sub sp, #8
204 ; CHECK-NEXT: subs.w r12, r2, r0
205 ; CHECK-NEXT: sbcs.w r12, r3, r1
206 ; CHECK-NEXT: csel r0, r0, r2, lt
207 ; CHECK-NEXT: csel r1, r1, r3, lt
208 ; CHECK-NEXT: add sp, #8
210 %c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b)
214 declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
216 define arm_aapcs_vfpcc <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {
217 ; CHECK-LABEL: smax2i64:
219 ; CHECK-NEXT: vmov r0, r1, d0
220 ; CHECK-NEXT: vmov r2, r3, d2
221 ; CHECK-NEXT: subs r0, r2, r0
222 ; CHECK-NEXT: sbcs.w r0, r3, r1
223 ; CHECK-NEXT: mov.w r1, #0
224 ; CHECK-NEXT: csetm r0, lt
225 ; CHECK-NEXT: vmov r3, r2, d3
226 ; CHECK-NEXT: bfi r1, r0, #0, #8
227 ; CHECK-NEXT: vmov r0, r12, d1
228 ; CHECK-NEXT: subs r0, r3, r0
229 ; CHECK-NEXT: sbcs.w r0, r2, r12
230 ; CHECK-NEXT: csetm r0, lt
231 ; CHECK-NEXT: bfi r1, r0, #8, #8
232 ; CHECK-NEXT: vmsr p0, r1
233 ; CHECK-NEXT: vpsel q0, q0, q1
235 %c = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b)
239 declare <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
241 define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) {
242 ; CHECK-LABEL: smax4i64:
244 ; CHECK-NEXT: .save {r4, lr}
245 ; CHECK-NEXT: push {r4, lr}
246 ; CHECK-NEXT: vmov r1, r12, d2
247 ; CHECK-NEXT: vmov r3, r2, d6
248 ; CHECK-NEXT: subs r1, r3, r1
249 ; CHECK-NEXT: mov.w r3, #0
250 ; CHECK-NEXT: sbcs.w r1, r2, r12
251 ; CHECK-NEXT: vmov lr, r12, d3
252 ; CHECK-NEXT: csetm r2, lt
253 ; CHECK-NEXT: movs r1, #0
254 ; CHECK-NEXT: bfi r3, r2, #0, #8
255 ; CHECK-NEXT: vmov r2, r4, d7
256 ; CHECK-NEXT: subs.w r2, r2, lr
257 ; CHECK-NEXT: sbcs.w r2, r4, r12
258 ; CHECK-NEXT: csetm r2, lt
259 ; CHECK-NEXT: bfi r3, r2, #8, #8
260 ; CHECK-NEXT: vmov r2, r12, d0
261 ; CHECK-NEXT: vmsr p0, r3
262 ; CHECK-NEXT: vmov r4, r3, d4
263 ; CHECK-NEXT: vpsel q1, q1, q3
264 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
265 ; CHECK-NEXT: subs r2, r4, r2
266 ; CHECK-NEXT: sbcs.w r2, r3, r12
267 ; CHECK-NEXT: vmov r4, r3, d5
268 ; CHECK-NEXT: csetm r2, lt
269 ; CHECK-NEXT: bfi r1, r2, #0, #8
270 ; CHECK-NEXT: vmov r2, r12, d1
271 ; CHECK-NEXT: subs r2, r4, r2
272 ; CHECK-NEXT: sbcs.w r2, r3, r12
273 ; CHECK-NEXT: csetm r2, lt
274 ; CHECK-NEXT: bfi r1, r2, #8, #8
275 ; CHECK-NEXT: vmsr p0, r1
276 ; CHECK-NEXT: vpsel q0, q0, q2
277 ; CHECK-NEXT: vstrw.32 q0, [r0]
278 ; CHECK-NEXT: pop {r4, pc}
279 %c = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b)
280 store <4 x i64> %c, ptr %p
284 declare i8 @llvm.umax.i8(i8 %a, i8 %b) readnone
286 define arm_aapcs_vfpcc i8 @umaxi8(i8 %a, i8 %b) {
287 ; CHECK-LABEL: umaxi8:
289 ; CHECK-NEXT: uxtb r1, r1
290 ; CHECK-NEXT: uxtb r0, r0
291 ; CHECK-NEXT: cmp r0, r1
292 ; CHECK-NEXT: csel r0, r0, r1, hi
294 %c = call i8 @llvm.umax.i8(i8 %a, i8 %b)
298 declare i16 @llvm.umax.i16(i16 %a, i16 %b) readnone
300 define arm_aapcs_vfpcc i16 @umaxi16(i16 %a, i16 %b) {
301 ; CHECK-LABEL: umaxi16:
303 ; CHECK-NEXT: uxth r1, r1
304 ; CHECK-NEXT: uxth r0, r0
305 ; CHECK-NEXT: cmp r0, r1
306 ; CHECK-NEXT: csel r0, r0, r1, hi
308 %c = call i16 @llvm.umax.i16(i16 %a, i16 %b)
312 declare i32 @llvm.umax.i32(i32 %a, i32 %b) readnone
314 define arm_aapcs_vfpcc i32 @umaxi32(i32 %a, i32 %b) {
315 ; CHECK-LABEL: umaxi32:
317 ; CHECK-NEXT: cmp r0, r1
318 ; CHECK-NEXT: csel r0, r0, r1, hi
320 %c = call i32 @llvm.umax.i32(i32 %a, i32 %b)
324 declare i64 @llvm.umax.i64(i64 %a, i64 %b) readnone
326 define arm_aapcs_vfpcc i64 @umaxi64(i64 %a, i64 %b) {
327 ; CHECK-LABEL: umaxi64:
329 ; CHECK-NEXT: subs.w r12, r2, r0
330 ; CHECK-NEXT: sbcs.w r12, r3, r1
331 ; CHECK-NEXT: csel r0, r0, r2, lo
332 ; CHECK-NEXT: csel r1, r1, r3, lo
334 %c = call i64 @llvm.umax.i64(i64 %a, i64 %b)
338 declare <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b) readnone
340 define arm_aapcs_vfpcc <8 x i8> @umax8i8(<8 x i8> %a, <8 x i8> %b) {
341 ; CHECK-LABEL: umax8i8:
343 ; CHECK-NEXT: vmovlb.u8 q1, q1
344 ; CHECK-NEXT: vmovlb.u8 q0, q0
345 ; CHECK-NEXT: vmax.u16 q0, q0, q1
347 %c = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
351 declare <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %b) readnone
353 define arm_aapcs_vfpcc <16 x i8> @umax16i8(<16 x i8> %a, <16 x i8> %b) {
354 ; CHECK-LABEL: umax16i8:
356 ; CHECK-NEXT: vmax.u8 q0, q0, q1
358 %c = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
362 declare <32 x i8> @llvm.umax.v32i8(<32 x i8> %a, <32 x i8> %b) readnone
364 define arm_aapcs_vfpcc void @umax32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) {
365 ; CHECK-LABEL: umax32i8:
367 ; CHECK-NEXT: vmax.u8 q1, q1, q3
368 ; CHECK-NEXT: vmax.u8 q0, q0, q2
369 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
370 ; CHECK-NEXT: vstrw.32 q0, [r0]
372 %c = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %a, <32 x i8> %b)
373 store <32 x i8> %c, ptr %p
377 declare <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b) readnone
379 define arm_aapcs_vfpcc <4 x i16> @umax4i16(<4 x i16> %a, <4 x i16> %b) {
380 ; CHECK-LABEL: umax4i16:
382 ; CHECK-NEXT: vmovlb.u16 q1, q1
383 ; CHECK-NEXT: vmovlb.u16 q0, q0
384 ; CHECK-NEXT: vmax.u32 q0, q0, q1
386 %c = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
390 declare <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %b) readnone
392 define arm_aapcs_vfpcc <8 x i16> @umax8i16(<8 x i16> %a, <8 x i16> %b) {
393 ; CHECK-LABEL: umax8i16:
395 ; CHECK-NEXT: vmax.u16 q0, q0, q1
397 %c = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
401 declare <16 x i16> @llvm.umax.v16i16(<16 x i16> %a, <16 x i16> %b) readnone
403 define arm_aapcs_vfpcc void @umax16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) {
404 ; CHECK-LABEL: umax16i16:
406 ; CHECK-NEXT: vmax.u16 q1, q1, q3
407 ; CHECK-NEXT: vmax.u16 q0, q0, q2
408 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
409 ; CHECK-NEXT: vstrw.32 q0, [r0]
411 %c = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %a, <16 x i16> %b)
412 store <16 x i16> %c, ptr %p
416 declare <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b) readnone
418 define arm_aapcs_vfpcc <2 x i32> @umax2i32(<2 x i32> %a, <2 x i32> %b) {
419 ; CHECK-LABEL: umax2i32:
421 ; CHECK-NEXT: vmov.i64 q2, #0xffffffff
422 ; CHECK-NEXT: vand q0, q0, q2
423 ; CHECK-NEXT: vand q1, q1, q2
424 ; CHECK-NEXT: vmov r0, r1, d0
425 ; CHECK-NEXT: vmov r2, r3, d2
426 ; CHECK-NEXT: subs r0, r2, r0
427 ; CHECK-NEXT: sbcs.w r0, r3, r1
428 ; CHECK-NEXT: mov.w r1, #0
429 ; CHECK-NEXT: csetm r0, lo
430 ; CHECK-NEXT: vmov r3, r2, d3
431 ; CHECK-NEXT: bfi r1, r0, #0, #8
432 ; CHECK-NEXT: vmov r0, r12, d1
433 ; CHECK-NEXT: subs r0, r3, r0
434 ; CHECK-NEXT: sbcs.w r0, r2, r12
435 ; CHECK-NEXT: csetm r0, lo
436 ; CHECK-NEXT: bfi r1, r0, #8, #8
437 ; CHECK-NEXT: vmsr p0, r1
438 ; CHECK-NEXT: vpsel q0, q0, q1
440 %c = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
444 declare <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b) readnone
446 define arm_aapcs_vfpcc <4 x i32> @umax4i32(<4 x i32> %a, <4 x i32> %b) {
447 ; CHECK-LABEL: umax4i32:
449 ; CHECK-NEXT: vmax.u32 q0, q0, q1
451 %c = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
455 declare <8 x i32> @llvm.umax.v8i32(<8 x i32> %a, <8 x i32> %b) readnone
457 define arm_aapcs_vfpcc void @umax8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) {
458 ; CHECK-LABEL: umax8i32:
460 ; CHECK-NEXT: vmax.u32 q1, q1, q3
461 ; CHECK-NEXT: vmax.u32 q0, q0, q2
462 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
463 ; CHECK-NEXT: vstrw.32 q0, [r0]
465 %c = call <8 x i32>@llvm.umax.v8i32(<8 x i32> %a, <8 x i32> %b)
466 store <8 x i32> %c, ptr %p
470 declare <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
472 define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
473 ; CHECK-LABEL: umax1i64:
475 ; CHECK-NEXT: .pad #8
476 ; CHECK-NEXT: sub sp, #8
477 ; CHECK-NEXT: subs.w r12, r2, r0
478 ; CHECK-NEXT: sbcs.w r12, r3, r1
479 ; CHECK-NEXT: csel r0, r0, r2, lo
480 ; CHECK-NEXT: csel r1, r1, r3, lo
481 ; CHECK-NEXT: add sp, #8
483 %c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b)
487 declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
489 define arm_aapcs_vfpcc <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {
490 ; CHECK-LABEL: umax2i64:
492 ; CHECK-NEXT: vmov r0, r1, d0
493 ; CHECK-NEXT: vmov r2, r3, d2
494 ; CHECK-NEXT: subs r0, r2, r0
495 ; CHECK-NEXT: sbcs.w r0, r3, r1
496 ; CHECK-NEXT: mov.w r1, #0
497 ; CHECK-NEXT: csetm r0, lo
498 ; CHECK-NEXT: vmov r3, r2, d3
499 ; CHECK-NEXT: bfi r1, r0, #0, #8
500 ; CHECK-NEXT: vmov r0, r12, d1
501 ; CHECK-NEXT: subs r0, r3, r0
502 ; CHECK-NEXT: sbcs.w r0, r2, r12
503 ; CHECK-NEXT: csetm r0, lo
504 ; CHECK-NEXT: bfi r1, r0, #8, #8
505 ; CHECK-NEXT: vmsr p0, r1
506 ; CHECK-NEXT: vpsel q0, q0, q1
508 %c = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
512 declare <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
514 define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) {
515 ; CHECK-LABEL: umax4i64:
517 ; CHECK-NEXT: .save {r4, lr}
518 ; CHECK-NEXT: push {r4, lr}
519 ; CHECK-NEXT: vmov r1, r12, d2
520 ; CHECK-NEXT: vmov r3, r2, d6
521 ; CHECK-NEXT: subs r1, r3, r1
522 ; CHECK-NEXT: mov.w r3, #0
523 ; CHECK-NEXT: sbcs.w r1, r2, r12
524 ; CHECK-NEXT: vmov lr, r12, d3
525 ; CHECK-NEXT: csetm r2, lo
526 ; CHECK-NEXT: movs r1, #0
527 ; CHECK-NEXT: bfi r3, r2, #0, #8
528 ; CHECK-NEXT: vmov r2, r4, d7
529 ; CHECK-NEXT: subs.w r2, r2, lr
530 ; CHECK-NEXT: sbcs.w r2, r4, r12
531 ; CHECK-NEXT: csetm r2, lo
532 ; CHECK-NEXT: bfi r3, r2, #8, #8
533 ; CHECK-NEXT: vmov r2, r12, d0
534 ; CHECK-NEXT: vmsr p0, r3
535 ; CHECK-NEXT: vmov r4, r3, d4
536 ; CHECK-NEXT: vpsel q1, q1, q3
537 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
538 ; CHECK-NEXT: subs r2, r4, r2
539 ; CHECK-NEXT: sbcs.w r2, r3, r12
540 ; CHECK-NEXT: vmov r4, r3, d5
541 ; CHECK-NEXT: csetm r2, lo
542 ; CHECK-NEXT: bfi r1, r2, #0, #8
543 ; CHECK-NEXT: vmov r2, r12, d1
544 ; CHECK-NEXT: subs r2, r4, r2
545 ; CHECK-NEXT: sbcs.w r2, r3, r12
546 ; CHECK-NEXT: csetm r2, lo
547 ; CHECK-NEXT: bfi r1, r2, #8, #8
548 ; CHECK-NEXT: vmsr p0, r1
549 ; CHECK-NEXT: vpsel q0, q0, q2
550 ; CHECK-NEXT: vstrw.32 q0, [r0]
551 ; CHECK-NEXT: pop {r4, pc}
552 %c = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b)
553 store <4 x i64> %c, ptr %p
557 declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone
559 define arm_aapcs_vfpcc i8 @smini8(i8 %a, i8 %b) {
560 ; CHECK-LABEL: smini8:
562 ; CHECK-NEXT: sxtb r1, r1
563 ; CHECK-NEXT: sxtb r0, r0
564 ; CHECK-NEXT: cmp r0, r1
565 ; CHECK-NEXT: csel r0, r0, r1, lt
567 %c = call i8 @llvm.smin.i8(i8 %a, i8 %b)
571 declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone
573 define arm_aapcs_vfpcc i16 @smini16(i16 %a, i16 %b) {
574 ; CHECK-LABEL: smini16:
576 ; CHECK-NEXT: sxth r1, r1
577 ; CHECK-NEXT: sxth r0, r0
578 ; CHECK-NEXT: cmp r0, r1
579 ; CHECK-NEXT: csel r0, r0, r1, lt
581 %c = call i16 @llvm.smin.i16(i16 %a, i16 %b)
585 declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone
587 define arm_aapcs_vfpcc i32 @smini32(i32 %a, i32 %b) {
588 ; CHECK-LABEL: smini32:
590 ; CHECK-NEXT: cmp r0, r1
591 ; CHECK-NEXT: csel r0, r0, r1, lt
593 %c = call i32 @llvm.smin.i32(i32 %a, i32 %b)
597 declare i64 @llvm.smin.i64(i64 %a, i64 %b) readnone
599 define arm_aapcs_vfpcc i64 @smini64(i64 %a, i64 %b) {
600 ; CHECK-LABEL: smini64:
602 ; CHECK-NEXT: subs.w r12, r0, r2
603 ; CHECK-NEXT: sbcs.w r12, r1, r3
604 ; CHECK-NEXT: csel r0, r0, r2, lt
605 ; CHECK-NEXT: csel r1, r1, r3, lt
607 %c = call i64 @llvm.smin.i64(i64 %a, i64 %b)
611 declare <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b) readnone
613 define arm_aapcs_vfpcc <8 x i8> @smin8i8(<8 x i8> %a, <8 x i8> %b) {
614 ; CHECK-LABEL: smin8i8:
616 ; CHECK-NEXT: vmovlb.s8 q1, q1
617 ; CHECK-NEXT: vmovlb.s8 q0, q0
618 ; CHECK-NEXT: vmin.s16 q0, q0, q1
620 %c = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
624 declare <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %b) readnone
626 define arm_aapcs_vfpcc <16 x i8> @smin16i8(<16 x i8> %a, <16 x i8> %b) {
627 ; CHECK-LABEL: smin16i8:
629 ; CHECK-NEXT: vmin.s8 q0, q0, q1
631 %c = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
635 declare <32 x i8> @llvm.smin.v32i8(<32 x i8> %a, <32 x i8> %b) readnone
637 define arm_aapcs_vfpcc void @smin32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) {
638 ; CHECK-LABEL: smin32i8:
640 ; CHECK-NEXT: vmin.s8 q1, q1, q3
641 ; CHECK-NEXT: vmin.s8 q0, q0, q2
642 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
643 ; CHECK-NEXT: vstrw.32 q0, [r0]
645 %c = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %a, <32 x i8> %b)
646 store <32 x i8> %c, ptr %p
650 declare <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b) readnone
652 define arm_aapcs_vfpcc <4 x i16> @smin4i16(<4 x i16> %a, <4 x i16> %b) {
653 ; CHECK-LABEL: smin4i16:
655 ; CHECK-NEXT: vmovlb.s16 q1, q1
656 ; CHECK-NEXT: vmovlb.s16 q0, q0
657 ; CHECK-NEXT: vmin.s32 q0, q0, q1
659 %c = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
663 declare <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %b) readnone
665 define arm_aapcs_vfpcc <8 x i16> @smin8i16(<8 x i16> %a, <8 x i16> %b) {
666 ; CHECK-LABEL: smin8i16:
668 ; CHECK-NEXT: vmin.s16 q0, q0, q1
670 %c = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
674 declare <16 x i16> @llvm.smin.v16i16(<16 x i16> %a, <16 x i16> %b) readnone
676 define arm_aapcs_vfpcc void @smin16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) {
677 ; CHECK-LABEL: smin16i16:
679 ; CHECK-NEXT: vmin.s16 q1, q1, q3
680 ; CHECK-NEXT: vmin.s16 q0, q0, q2
681 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
682 ; CHECK-NEXT: vstrw.32 q0, [r0]
684 %c = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %a, <16 x i16> %b)
685 store <16 x i16> %c, ptr %p
689 declare <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) readnone
691 define arm_aapcs_vfpcc <2 x i32> @smin2i32(<2 x i32> %a, <2 x i32> %b) {
692 ; CHECK-LABEL: smin2i32:
694 ; CHECK-NEXT: .save {r7, lr}
695 ; CHECK-NEXT: push {r7, lr}
696 ; CHECK-NEXT: vmov r0, s6
697 ; CHECK-NEXT: vmov r1, s4
698 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
699 ; CHECK-NEXT: asrs r2, r0, #31
700 ; CHECK-NEXT: asrs r3, r1, #31
701 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
702 ; CHECK-NEXT: vmov r3, s0
703 ; CHECK-NEXT: vmov r2, s2
704 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
705 ; CHECK-NEXT: asr.w lr, r3, #31
706 ; CHECK-NEXT: subs r3, r3, r1
707 ; CHECK-NEXT: sbcs.w r1, lr, r1, asr #31
708 ; CHECK-NEXT: mov.w r3, #0
709 ; CHECK-NEXT: csetm r1, lt
710 ; CHECK-NEXT: asr.w r12, r2, #31
711 ; CHECK-NEXT: bfi r3, r1, #0, #8
712 ; CHECK-NEXT: subs r1, r2, r0
713 ; CHECK-NEXT: sbcs.w r0, r12, r0, asr #31
714 ; CHECK-NEXT: vmov q0[3], q0[1], lr, r12
715 ; CHECK-NEXT: csetm r0, lt
716 ; CHECK-NEXT: bfi r3, r0, #8, #8
717 ; CHECK-NEXT: vmsr p0, r3
718 ; CHECK-NEXT: vpsel q0, q0, q1
719 ; CHECK-NEXT: pop {r7, pc}
720 %c = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
724 declare <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b) readnone
726 define arm_aapcs_vfpcc <4 x i32> @smin4i32(<4 x i32> %a, <4 x i32> %b) {
727 ; CHECK-LABEL: smin4i32:
729 ; CHECK-NEXT: vmin.s32 q0, q0, q1
731 %c = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
735 declare <8 x i32> @llvm.smin.v8i32(<8 x i32> %a, <8 x i32> %b) readnone
737 define arm_aapcs_vfpcc void @smin8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) {
738 ; CHECK-LABEL: smin8i32:
740 ; CHECK-NEXT: vmin.s32 q1, q1, q3
741 ; CHECK-NEXT: vmin.s32 q0, q0, q2
742 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
743 ; CHECK-NEXT: vstrw.32 q0, [r0]
745 %c = call <8 x i32>@llvm.smin.v8i32(<8 x i32> %a, <8 x i32> %b)
746 store <8 x i32> %c, ptr %p
750 declare <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
752 define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
753 ; CHECK-LABEL: smin1i64:
755 ; CHECK-NEXT: .pad #8
756 ; CHECK-NEXT: sub sp, #8
757 ; CHECK-NEXT: subs.w r12, r0, r2
758 ; CHECK-NEXT: sbcs.w r12, r1, r3
759 ; CHECK-NEXT: csel r0, r0, r2, lt
760 ; CHECK-NEXT: csel r1, r1, r3, lt
761 ; CHECK-NEXT: add sp, #8
763 %c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b)
767 declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
769 define arm_aapcs_vfpcc <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {
770 ; CHECK-LABEL: smin2i64:
772 ; CHECK-NEXT: vmov r0, r1, d2
773 ; CHECK-NEXT: vmov r2, r3, d0
774 ; CHECK-NEXT: subs r0, r2, r0
775 ; CHECK-NEXT: sbcs.w r0, r3, r1
776 ; CHECK-NEXT: mov.w r1, #0
777 ; CHECK-NEXT: csetm r0, lt
778 ; CHECK-NEXT: vmov r3, r2, d1
779 ; CHECK-NEXT: bfi r1, r0, #0, #8
780 ; CHECK-NEXT: vmov r0, r12, d3
781 ; CHECK-NEXT: subs r0, r3, r0
782 ; CHECK-NEXT: sbcs.w r0, r2, r12
783 ; CHECK-NEXT: csetm r0, lt
784 ; CHECK-NEXT: bfi r1, r0, #8, #8
785 ; CHECK-NEXT: vmsr p0, r1
786 ; CHECK-NEXT: vpsel q0, q0, q1
788 %c = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b)
792 declare <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
794 define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) {
795 ; CHECK-LABEL: smin4i64:
797 ; CHECK-NEXT: .save {r4, lr}
798 ; CHECK-NEXT: push {r4, lr}
799 ; CHECK-NEXT: vmov r1, r12, d6
800 ; CHECK-NEXT: vmov r3, r2, d2
801 ; CHECK-NEXT: subs r1, r3, r1
802 ; CHECK-NEXT: mov.w r3, #0
803 ; CHECK-NEXT: sbcs.w r1, r2, r12
804 ; CHECK-NEXT: vmov lr, r12, d7
805 ; CHECK-NEXT: csetm r2, lt
806 ; CHECK-NEXT: movs r1, #0
807 ; CHECK-NEXT: bfi r3, r2, #0, #8
808 ; CHECK-NEXT: vmov r2, r4, d3
809 ; CHECK-NEXT: subs.w r2, r2, lr
810 ; CHECK-NEXT: sbcs.w r2, r4, r12
811 ; CHECK-NEXT: csetm r2, lt
812 ; CHECK-NEXT: bfi r3, r2, #8, #8
813 ; CHECK-NEXT: vmov r2, r12, d4
814 ; CHECK-NEXT: vmsr p0, r3
815 ; CHECK-NEXT: vmov r4, r3, d0
816 ; CHECK-NEXT: vpsel q1, q1, q3
817 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
818 ; CHECK-NEXT: subs r2, r4, r2
819 ; CHECK-NEXT: sbcs.w r2, r3, r12
820 ; CHECK-NEXT: vmov r4, r3, d1
821 ; CHECK-NEXT: csetm r2, lt
822 ; CHECK-NEXT: bfi r1, r2, #0, #8
823 ; CHECK-NEXT: vmov r2, r12, d5
824 ; CHECK-NEXT: subs r2, r4, r2
825 ; CHECK-NEXT: sbcs.w r2, r3, r12
826 ; CHECK-NEXT: csetm r2, lt
827 ; CHECK-NEXT: bfi r1, r2, #8, #8
828 ; CHECK-NEXT: vmsr p0, r1
829 ; CHECK-NEXT: vpsel q0, q0, q2
830 ; CHECK-NEXT: vstrw.32 q0, [r0]
831 ; CHECK-NEXT: pop {r4, pc}
832 %c = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b)
833 store <4 x i64> %c, ptr %p
837 declare i8 @llvm.umin.i8(i8 %a, i8 %b) readnone
839 define arm_aapcs_vfpcc i8 @umini8(i8 %a, i8 %b) {
840 ; CHECK-LABEL: umini8:
842 ; CHECK-NEXT: uxtb r1, r1
843 ; CHECK-NEXT: uxtb r0, r0
844 ; CHECK-NEXT: cmp r0, r1
845 ; CHECK-NEXT: csel r0, r0, r1, lo
847 %c = call i8 @llvm.umin.i8(i8 %a, i8 %b)
851 declare i16 @llvm.umin.i16(i16 %a, i16 %b) readnone
853 define arm_aapcs_vfpcc i16 @umini16(i16 %a, i16 %b) {
854 ; CHECK-LABEL: umini16:
856 ; CHECK-NEXT: uxth r1, r1
857 ; CHECK-NEXT: uxth r0, r0
858 ; CHECK-NEXT: cmp r0, r1
859 ; CHECK-NEXT: csel r0, r0, r1, lo
861 %c = call i16 @llvm.umin.i16(i16 %a, i16 %b)
865 declare i32 @llvm.umin.i32(i32 %a, i32 %b) readnone
867 define arm_aapcs_vfpcc i32 @umini32(i32 %a, i32 %b) {
868 ; CHECK-LABEL: umini32:
870 ; CHECK-NEXT: cmp r0, r1
871 ; CHECK-NEXT: csel r0, r0, r1, lo
873 %c = call i32 @llvm.umin.i32(i32 %a, i32 %b)
877 declare i64 @llvm.umin.i64(i64 %a, i64 %b) readnone
879 define arm_aapcs_vfpcc i64 @umini64(i64 %a, i64 %b) {
880 ; CHECK-LABEL: umini64:
882 ; CHECK-NEXT: subs.w r12, r0, r2
883 ; CHECK-NEXT: sbcs.w r12, r1, r3
884 ; CHECK-NEXT: csel r0, r0, r2, lo
885 ; CHECK-NEXT: csel r1, r1, r3, lo
887 %c = call i64 @llvm.umin.i64(i64 %a, i64 %b)
891 declare <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b) readnone
893 define arm_aapcs_vfpcc <8 x i8> @umin8i8(<8 x i8> %a, <8 x i8> %b) {
894 ; CHECK-LABEL: umin8i8:
896 ; CHECK-NEXT: vmovlb.u8 q1, q1
897 ; CHECK-NEXT: vmovlb.u8 q0, q0
898 ; CHECK-NEXT: vmin.u16 q0, q0, q1
900 %c = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
904 declare <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %b) readnone
906 define arm_aapcs_vfpcc <16 x i8> @umin16i8(<16 x i8> %a, <16 x i8> %b) {
907 ; CHECK-LABEL: umin16i8:
909 ; CHECK-NEXT: vmin.u8 q0, q0, q1
911 %c = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
915 declare <32 x i8> @llvm.umin.v32i8(<32 x i8> %a, <32 x i8> %b) readnone
917 define arm_aapcs_vfpcc void @umin32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) {
918 ; CHECK-LABEL: umin32i8:
920 ; CHECK-NEXT: vmin.u8 q1, q1, q3
921 ; CHECK-NEXT: vmin.u8 q0, q0, q2
922 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
923 ; CHECK-NEXT: vstrw.32 q0, [r0]
925 %c = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %a, <32 x i8> %b)
926 store <32 x i8> %c, ptr %p
930 declare <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b) readnone
932 define arm_aapcs_vfpcc <4 x i16> @umin4i16(<4 x i16> %a, <4 x i16> %b) {
933 ; CHECK-LABEL: umin4i16:
935 ; CHECK-NEXT: vmovlb.u16 q1, q1
936 ; CHECK-NEXT: vmovlb.u16 q0, q0
937 ; CHECK-NEXT: vmin.u32 q0, q0, q1
939 %c = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
943 declare <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %b) readnone
945 define arm_aapcs_vfpcc <8 x i16> @umin8i16(<8 x i16> %a, <8 x i16> %b) {
946 ; CHECK-LABEL: umin8i16:
948 ; CHECK-NEXT: vmin.u16 q0, q0, q1
950 %c = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
954 declare <16 x i16> @llvm.umin.v16i16(<16 x i16> %a, <16 x i16> %b) readnone
956 define arm_aapcs_vfpcc void @umin16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) {
957 ; CHECK-LABEL: umin16i16:
959 ; CHECK-NEXT: vmin.u16 q1, q1, q3
960 ; CHECK-NEXT: vmin.u16 q0, q0, q2
961 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
962 ; CHECK-NEXT: vstrw.32 q0, [r0]
964 %c = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %a, <16 x i16> %b)
965 store <16 x i16> %c, ptr %p
969 declare <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b) readnone
971 define arm_aapcs_vfpcc <2 x i32> @umin2i32(<2 x i32> %a, <2 x i32> %b) {
972 ; CHECK-LABEL: umin2i32:
974 ; CHECK-NEXT: vmov.i64 q2, #0xffffffff
975 ; CHECK-NEXT: vand q1, q1, q2
976 ; CHECK-NEXT: vand q0, q0, q2
977 ; CHECK-NEXT: vmov r0, r1, d2
978 ; CHECK-NEXT: vmov r2, r3, d0
979 ; CHECK-NEXT: subs r0, r2, r0
980 ; CHECK-NEXT: sbcs.w r0, r3, r1
981 ; CHECK-NEXT: mov.w r1, #0
982 ; CHECK-NEXT: csetm r0, lo
983 ; CHECK-NEXT: vmov r3, r2, d1
984 ; CHECK-NEXT: bfi r1, r0, #0, #8
985 ; CHECK-NEXT: vmov r0, r12, d3
986 ; CHECK-NEXT: subs r0, r3, r0
987 ; CHECK-NEXT: sbcs.w r0, r2, r12
988 ; CHECK-NEXT: csetm r0, lo
989 ; CHECK-NEXT: bfi r1, r0, #8, #8
990 ; CHECK-NEXT: vmsr p0, r1
991 ; CHECK-NEXT: vpsel q0, q0, q1
993 %c = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
997 declare <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b) readnone
999 define arm_aapcs_vfpcc <4 x i32> @umin4i32(<4 x i32> %a, <4 x i32> %b) {
1000 ; CHECK-LABEL: umin4i32:
1002 ; CHECK-NEXT: vmin.u32 q0, q0, q1
1004 %c = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
1008 declare <8 x i32> @llvm.umin.v8i32(<8 x i32> %a, <8 x i32> %b) readnone
1010 define arm_aapcs_vfpcc void @umin8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) {
1011 ; CHECK-LABEL: umin8i32:
1013 ; CHECK-NEXT: vmin.u32 q1, q1, q3
1014 ; CHECK-NEXT: vmin.u32 q0, q0, q2
1015 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
1016 ; CHECK-NEXT: vstrw.32 q0, [r0]
1018 %c = call <8 x i32>@llvm.umin.v8i32(<8 x i32> %a, <8 x i32> %b)
1019 store <8 x i32> %c, ptr %p
1023 declare <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
1025 define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
1026 ; CHECK-LABEL: umin1i64:
1028 ; CHECK-NEXT: .pad #8
1029 ; CHECK-NEXT: sub sp, #8
1030 ; CHECK-NEXT: subs.w r12, r0, r2
1031 ; CHECK-NEXT: sbcs.w r12, r1, r3
1032 ; CHECK-NEXT: csel r0, r0, r2, lo
1033 ; CHECK-NEXT: csel r1, r1, r3, lo
1034 ; CHECK-NEXT: add sp, #8
1036 %c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b)
1040 declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
1042 define arm_aapcs_vfpcc <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {
1043 ; CHECK-LABEL: umin2i64:
1045 ; CHECK-NEXT: vmov r0, r1, d2
1046 ; CHECK-NEXT: vmov r2, r3, d0
1047 ; CHECK-NEXT: subs r0, r2, r0
1048 ; CHECK-NEXT: sbcs.w r0, r3, r1
1049 ; CHECK-NEXT: mov.w r1, #0
1050 ; CHECK-NEXT: csetm r0, lo
1051 ; CHECK-NEXT: vmov r3, r2, d1
1052 ; CHECK-NEXT: bfi r1, r0, #0, #8
1053 ; CHECK-NEXT: vmov r0, r12, d3
1054 ; CHECK-NEXT: subs r0, r3, r0
1055 ; CHECK-NEXT: sbcs.w r0, r2, r12
1056 ; CHECK-NEXT: csetm r0, lo
1057 ; CHECK-NEXT: bfi r1, r0, #8, #8
1058 ; CHECK-NEXT: vmsr p0, r1
1059 ; CHECK-NEXT: vpsel q0, q0, q1
1061 %c = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b)
1065 declare <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
1067 define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) {
1068 ; CHECK-LABEL: umin4i64:
1070 ; CHECK-NEXT: .save {r4, lr}
1071 ; CHECK-NEXT: push {r4, lr}
1072 ; CHECK-NEXT: vmov r1, r12, d6
1073 ; CHECK-NEXT: vmov r3, r2, d2
1074 ; CHECK-NEXT: subs r1, r3, r1
1075 ; CHECK-NEXT: mov.w r3, #0
1076 ; CHECK-NEXT: sbcs.w r1, r2, r12
1077 ; CHECK-NEXT: vmov lr, r12, d7
1078 ; CHECK-NEXT: csetm r2, lo
1079 ; CHECK-NEXT: movs r1, #0
1080 ; CHECK-NEXT: bfi r3, r2, #0, #8
1081 ; CHECK-NEXT: vmov r2, r4, d3
1082 ; CHECK-NEXT: subs.w r2, r2, lr
1083 ; CHECK-NEXT: sbcs.w r2, r4, r12
1084 ; CHECK-NEXT: csetm r2, lo
1085 ; CHECK-NEXT: bfi r3, r2, #8, #8
1086 ; CHECK-NEXT: vmov r2, r12, d4
1087 ; CHECK-NEXT: vmsr p0, r3
1088 ; CHECK-NEXT: vmov r4, r3, d0
1089 ; CHECK-NEXT: vpsel q1, q1, q3
1090 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
1091 ; CHECK-NEXT: subs r2, r4, r2
1092 ; CHECK-NEXT: sbcs.w r2, r3, r12
1093 ; CHECK-NEXT: vmov r4, r3, d1
1094 ; CHECK-NEXT: csetm r2, lo
1095 ; CHECK-NEXT: bfi r1, r2, #0, #8
1096 ; CHECK-NEXT: vmov r2, r12, d5
1097 ; CHECK-NEXT: subs r2, r4, r2
1098 ; CHECK-NEXT: sbcs.w r2, r3, r12
1099 ; CHECK-NEXT: csetm r2, lo
1100 ; CHECK-NEXT: bfi r1, r2, #8, #8
1101 ; CHECK-NEXT: vmsr p0, r1
1102 ; CHECK-NEXT: vpsel q0, q0, q2
1103 ; CHECK-NEXT: vstrw.32 q0, [r0]
1104 ; CHECK-NEXT: pop {r4, pc}
1105 %c = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b)
1106 store <4 x i64> %c, ptr %p