1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=armv7a-eabihf -mattr=+neon %s -o - | FileCheck %s
8 define <8 x i8> @sabd_8b(<8 x i8> %a, <8 x i8> %b) {
9 ; CHECK-LABEL: sabd_8b:
11 ; CHECK-NEXT: vabd.s8 d0, d0, d1
13 %a.sext = sext <8 x i8> %a to <8 x i16>
14 %b.sext = sext <8 x i8> %b to <8 x i16>
15 %sub = sub <8 x i16> %a.sext, %b.sext
16 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
17 %trunc = trunc <8 x i16> %abs to <8 x i8>
21 define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) {
22 ; CHECK-LABEL: sabd_16b:
24 ; CHECK-NEXT: vabd.s8 q0, q0, q1
26 %a.sext = sext <16 x i8> %a to <16 x i16>
27 %b.sext = sext <16 x i8> %b to <16 x i16>
28 %sub = sub <16 x i16> %a.sext, %b.sext
29 %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
30 %trunc = trunc <16 x i16> %abs to <16 x i8>
34 define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) {
35 ; CHECK-LABEL: sabd_4h:
37 ; CHECK-NEXT: vabd.s16 d0, d0, d1
39 %a.sext = sext <4 x i16> %a to <4 x i32>
40 %b.sext = sext <4 x i16> %b to <4 x i32>
41 %sub = sub <4 x i32> %a.sext, %b.sext
42 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
43 %trunc = trunc <4 x i32> %abs to <4 x i16>
47 define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
48 ; CHECK-LABEL: sabd_4h_promoted_ops:
50 ; CHECK-NEXT: vshl.i16 d16, d1, #8
51 ; CHECK-NEXT: vshl.i16 d17, d0, #8
52 ; CHECK-NEXT: vshr.s16 d16, d16, #8
53 ; CHECK-NEXT: vshr.s16 d17, d17, #8
54 ; CHECK-NEXT: vabd.s16 d0, d17, d16
56 %a.sext = sext <4 x i8> %a to <4 x i16>
57 %b.sext = sext <4 x i8> %b to <4 x i16>
58 %sub = sub <4 x i16> %a.sext, %b.sext
59 %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
63 define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) {
64 ; CHECK-LABEL: sabd_8h:
66 ; CHECK-NEXT: vabd.s16 q0, q0, q1
68 %a.sext = sext <8 x i16> %a to <8 x i32>
69 %b.sext = sext <8 x i16> %b to <8 x i32>
70 %sub = sub <8 x i32> %a.sext, %b.sext
71 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
72 %trunc = trunc <8 x i32> %abs to <8 x i16>
76 define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
77 ; CHECK-LABEL: sabd_8h_promoted_ops:
79 ; CHECK-NEXT: vabdl.s8 q0, d0, d1
81 %a.sext = sext <8 x i8> %a to <8 x i16>
82 %b.sext = sext <8 x i8> %b to <8 x i16>
83 %sub = sub <8 x i16> %a.sext, %b.sext
84 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
88 define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) {
89 ; CHECK-LABEL: sabd_2s:
91 ; CHECK-NEXT: vabd.s32 d0, d0, d1
93 %a.sext = sext <2 x i32> %a to <2 x i64>
94 %b.sext = sext <2 x i32> %b to <2 x i64>
95 %sub = sub <2 x i64> %a.sext, %b.sext
96 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
97 %trunc = trunc <2 x i64> %abs to <2 x i32>
101 define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
102 ; CHECK-LABEL: sabd_2s_promoted_ops:
104 ; CHECK-NEXT: vshl.i32 d16, d1, #16
105 ; CHECK-NEXT: vshl.i32 d17, d0, #16
106 ; CHECK-NEXT: vshr.s32 d16, d16, #16
107 ; CHECK-NEXT: vshr.s32 d17, d17, #16
108 ; CHECK-NEXT: vabd.s32 d0, d17, d16
110 %a.sext = sext <2 x i16> %a to <2 x i32>
111 %b.sext = sext <2 x i16> %b to <2 x i32>
112 %sub = sub <2 x i32> %a.sext, %b.sext
113 %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
117 define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) {
118 ; CHECK-LABEL: sabd_4s:
120 ; CHECK-NEXT: vabd.s32 q0, q0, q1
122 %a.sext = sext <4 x i32> %a to <4 x i64>
123 %b.sext = sext <4 x i32> %b to <4 x i64>
124 %sub = sub <4 x i64> %a.sext, %b.sext
125 %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
126 %trunc = trunc <4 x i64> %abs to <4 x i32>
130 define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
131 ; CHECK-LABEL: sabd_4s_promoted_ops:
133 ; CHECK-NEXT: vabdl.s16 q0, d0, d1
135 %a.sext = sext <4 x i16> %a to <4 x i32>
136 %b.sext = sext <4 x i16> %b to <4 x i32>
137 %sub = sub <4 x i32> %a.sext, %b.sext
138 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
142 define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
143 ; CHECK-LABEL: sabd_2d:
145 ; CHECK-NEXT: .save {r4, r5, r11, lr}
146 ; CHECK-NEXT: push {r4, r5, r11, lr}
147 ; CHECK-NEXT: vmov lr, r1, d1
148 ; CHECK-NEXT: vmov r2, r3, d3
149 ; CHECK-NEXT: vmov r12, r0, d0
150 ; CHECK-NEXT: subs lr, lr, r2
151 ; CHECK-NEXT: asr r4, r1, #31
152 ; CHECK-NEXT: sbcs r1, r1, r3
153 ; CHECK-NEXT: sbcs r2, r4, r3, asr #31
154 ; CHECK-NEXT: vmov r2, r5, d2
155 ; CHECK-NEXT: sbc r3, r4, r3, asr #31
156 ; CHECK-NEXT: eor r4, lr, r3, asr #31
157 ; CHECK-NEXT: eor r1, r1, r3, asr #31
158 ; CHECK-NEXT: subs r4, r4, r3, asr #31
159 ; CHECK-NEXT: sbc lr, r1, r3, asr #31
160 ; CHECK-NEXT: asr r3, r0, #31
161 ; CHECK-NEXT: vmov.32 d1[0], r4
162 ; CHECK-NEXT: subs r2, r12, r2
163 ; CHECK-NEXT: sbcs r0, r0, r5
164 ; CHECK-NEXT: sbcs r1, r3, r5, asr #31
165 ; CHECK-NEXT: sbc r1, r3, r5, asr #31
166 ; CHECK-NEXT: eor r2, r2, r1, asr #31
167 ; CHECK-NEXT: eor r0, r0, r1, asr #31
168 ; CHECK-NEXT: subs r2, r2, r1, asr #31
169 ; CHECK-NEXT: sbc r0, r0, r1, asr #31
170 ; CHECK-NEXT: vmov.32 d0[0], r2
171 ; CHECK-NEXT: vmov.32 d1[1], lr
172 ; CHECK-NEXT: vmov.32 d0[1], r0
173 ; CHECK-NEXT: pop {r4, r5, r11, pc}
174 %a.sext = sext <2 x i64> %a to <2 x i128>
175 %b.sext = sext <2 x i64> %b to <2 x i128>
176 %sub = sub <2 x i128> %a.sext, %b.sext
177 %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
178 %trunc = trunc <2 x i128> %abs to <2 x i64>
182 define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
183 ; CHECK-LABEL: sabd_2d_promoted_ops:
185 ; CHECK-NEXT: vabdl.s32 q0, d0, d1
187 %a.sext = sext <2 x i32> %a to <2 x i64>
188 %b.sext = sext <2 x i32> %b to <2 x i64>
189 %sub = sub <2 x i64> %a.sext, %b.sext
190 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
198 define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) {
199 ; CHECK-LABEL: uabd_8b:
201 ; CHECK-NEXT: vabd.u8 d0, d0, d1
203 %a.zext = zext <8 x i8> %a to <8 x i16>
204 %b.zext = zext <8 x i8> %b to <8 x i16>
205 %sub = sub <8 x i16> %a.zext, %b.zext
206 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
207 %trunc = trunc <8 x i16> %abs to <8 x i8>
211 define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) {
212 ; CHECK-LABEL: uabd_16b:
214 ; CHECK-NEXT: vabd.u8 q0, q0, q1
216 %a.zext = zext <16 x i8> %a to <16 x i16>
217 %b.zext = zext <16 x i8> %b to <16 x i16>
218 %sub = sub <16 x i16> %a.zext, %b.zext
219 %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
220 %trunc = trunc <16 x i16> %abs to <16 x i8>
224 define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) {
225 ; CHECK-LABEL: uabd_4h:
227 ; CHECK-NEXT: vabd.u16 d0, d0, d1
229 %a.zext = zext <4 x i16> %a to <4 x i32>
230 %b.zext = zext <4 x i16> %b to <4 x i32>
231 %sub = sub <4 x i32> %a.zext, %b.zext
232 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
233 %trunc = trunc <4 x i32> %abs to <4 x i16>
237 define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
238 ; CHECK-LABEL: uabd_4h_promoted_ops:
240 ; CHECK-NEXT: vbic.i16 d1, #0xff00
241 ; CHECK-NEXT: vbic.i16 d0, #0xff00
242 ; CHECK-NEXT: vabd.u16 d0, d0, d1
244 %a.zext = zext <4 x i8> %a to <4 x i16>
245 %b.zext = zext <4 x i8> %b to <4 x i16>
246 %sub = sub <4 x i16> %a.zext, %b.zext
247 %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
251 define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) {
252 ; CHECK-LABEL: uabd_8h:
254 ; CHECK-NEXT: vabd.u16 q0, q0, q1
256 %a.zext = zext <8 x i16> %a to <8 x i32>
257 %b.zext = zext <8 x i16> %b to <8 x i32>
258 %sub = sub <8 x i32> %a.zext, %b.zext
259 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
260 %trunc = trunc <8 x i32> %abs to <8 x i16>
264 define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
265 ; CHECK-LABEL: uabd_8h_promoted_ops:
267 ; CHECK-NEXT: vabdl.u8 q0, d0, d1
269 %a.zext = zext <8 x i8> %a to <8 x i16>
270 %b.zext = zext <8 x i8> %b to <8 x i16>
271 %sub = sub <8 x i16> %a.zext, %b.zext
272 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
276 define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) {
277 ; CHECK-LABEL: uabd_2s:
279 ; CHECK-NEXT: vabd.u32 d0, d0, d1
281 %a.zext = zext <2 x i32> %a to <2 x i64>
282 %b.zext = zext <2 x i32> %b to <2 x i64>
283 %sub = sub <2 x i64> %a.zext, %b.zext
284 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
285 %trunc = trunc <2 x i64> %abs to <2 x i32>
289 define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
290 ; CHECK-LABEL: uabd_2s_promoted_ops:
292 ; CHECK-NEXT: vmov.i32 d16, #0xffff
293 ; CHECK-NEXT: vand d17, d1, d16
294 ; CHECK-NEXT: vand d16, d0, d16
295 ; CHECK-NEXT: vabd.u32 d0, d16, d17
297 %a.zext = zext <2 x i16> %a to <2 x i32>
298 %b.zext = zext <2 x i16> %b to <2 x i32>
299 %sub = sub <2 x i32> %a.zext, %b.zext
300 %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
304 define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) {
305 ; CHECK-LABEL: uabd_4s:
307 ; CHECK-NEXT: vabd.u32 q0, q0, q1
309 %a.zext = zext <4 x i32> %a to <4 x i64>
310 %b.zext = zext <4 x i32> %b to <4 x i64>
311 %sub = sub <4 x i64> %a.zext, %b.zext
312 %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
313 %trunc = trunc <4 x i64> %abs to <4 x i32>
317 define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
318 ; CHECK-LABEL: uabd_4s_promoted_ops:
320 ; CHECK-NEXT: vabdl.u16 q0, d0, d1
322 %a.zext = zext <4 x i16> %a to <4 x i32>
323 %b.zext = zext <4 x i16> %b to <4 x i32>
324 %sub = sub <4 x i32> %a.zext, %b.zext
325 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
329 define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) {
330 ; CHECK-LABEL: uabd_2d:
332 ; CHECK-NEXT: .save {r4, r5, r6, lr}
333 ; CHECK-NEXT: push {r4, r5, r6, lr}
334 ; CHECK-NEXT: vmov r0, r12, d3
335 ; CHECK-NEXT: mov r1, #0
336 ; CHECK-NEXT: vmov r2, r3, d1
337 ; CHECK-NEXT: vmov lr, r6, d2
338 ; CHECK-NEXT: vmov r4, r5, d0
339 ; CHECK-NEXT: subs r0, r2, r0
340 ; CHECK-NEXT: sbcs r2, r3, r12
341 ; CHECK-NEXT: sbcs r3, r1, #0
342 ; CHECK-NEXT: sbc r3, r1, #0
343 ; CHECK-NEXT: eor r0, r0, r3, asr #31
344 ; CHECK-NEXT: eor r2, r2, r3, asr #31
345 ; CHECK-NEXT: subs r0, r0, r3, asr #31
346 ; CHECK-NEXT: sbc r2, r2, r3, asr #31
347 ; CHECK-NEXT: subs r3, r4, lr
348 ; CHECK-NEXT: sbcs r6, r5, r6
349 ; CHECK-NEXT: vmov.32 d1[0], r0
350 ; CHECK-NEXT: sbcs r5, r1, #0
351 ; CHECK-NEXT: sbc r1, r1, #0
352 ; CHECK-NEXT: eor r3, r3, r1, asr #31
353 ; CHECK-NEXT: subs r0, r3, r1, asr #31
354 ; CHECK-NEXT: vmov.32 d0[0], r0
355 ; CHECK-NEXT: eor r0, r6, r1, asr #31
356 ; CHECK-NEXT: sbc r0, r0, r1, asr #31
357 ; CHECK-NEXT: vmov.32 d1[1], r2
358 ; CHECK-NEXT: vmov.32 d0[1], r0
359 ; CHECK-NEXT: pop {r4, r5, r6, pc}
360 %a.zext = zext <2 x i64> %a to <2 x i128>
361 %b.zext = zext <2 x i64> %b to <2 x i128>
362 %sub = sub <2 x i128> %a.zext, %b.zext
363 %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
364 %trunc = trunc <2 x i128> %abs to <2 x i64>
368 define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
369 ; CHECK-LABEL: uabd_2d_promoted_ops:
371 ; CHECK-NEXT: vabdl.u32 q0, d0, d1
373 %a.zext = zext <2 x i32> %a to <2 x i64>
374 %b.zext = zext <2 x i32> %b to <2 x i64>
375 %sub = sub <2 x i64> %a.zext, %b.zext
376 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
380 define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) {
381 ; CHECK-LABEL: uabd_v16i8_nuw:
383 ; CHECK-NEXT: vsub.i8 q8, q0, q1
384 ; CHECK-NEXT: vabs.s8 q0, q8
386 %sub = sub nuw <16 x i8> %a, %b
387 %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
391 define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) {
392 ; CHECK-LABEL: uabd_v8i16_nuw:
394 ; CHECK-NEXT: vsub.i16 q8, q0, q1
395 ; CHECK-NEXT: vabs.s16 q0, q8
397 %sub = sub nuw <8 x i16> %a, %b
398 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
402 define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) {
403 ; CHECK-LABEL: uabd_v4i32_nuw:
405 ; CHECK-NEXT: vsub.i32 q8, q0, q1
406 ; CHECK-NEXT: vabs.s32 q0, q8
408 %sub = sub nuw <4 x i32> %a, %b
409 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
413 define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) {
414 ; CHECK-LABEL: uabd_v2i64_nuw:
416 ; CHECK-NEXT: vsub.i64 q8, q0, q1
417 ; CHECK-NEXT: vshr.s64 q9, q8, #63
418 ; CHECK-NEXT: veor q8, q8, q9
419 ; CHECK-NEXT: vsub.i64 q0, q8, q9
421 %sub = sub nuw <2 x i64> %a, %b
422 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
426 define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) {
427 ; CHECK-LABEL: sabd_v16i8_nsw:
429 ; CHECK-NEXT: vabd.s8 q0, q0, q1
431 %sub = sub nsw <16 x i8> %a, %b
432 %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
436 define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) {
437 ; CHECK-LABEL: sabd_v8i16_nsw:
439 ; CHECK-NEXT: vabd.s16 q0, q0, q1
441 %sub = sub nsw <8 x i16> %a, %b
442 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
446 define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) {
447 ; CHECK-LABEL: sabd_v4i32_nsw:
449 ; CHECK-NEXT: vabd.s32 q0, q0, q1
451 %sub = sub nsw <4 x i32> %a, %b
452 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
456 define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) {
457 ; CHECK-LABEL: sabd_v2i64_nsw:
459 ; CHECK-NEXT: vsub.i64 q8, q0, q1
460 ; CHECK-NEXT: vshr.s64 q9, q8, #63
461 ; CHECK-NEXT: veor q8, q8, q9
462 ; CHECK-NEXT: vsub.i64 q0, q8, q9
464 %sub = sub nsw <2 x i64> %a, %b
465 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
469 define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
470 ; CHECK-LABEL: smaxmin_v16i8:
472 ; CHECK-NEXT: vabd.s8 q0, q0, q1
474 %a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1)
475 %b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1)
476 %sub = sub <16 x i8> %a, %b
480 define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
481 ; CHECK-LABEL: smaxmin_v8i16:
483 ; CHECK-NEXT: vabd.s16 q0, q0, q1
485 %a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1)
486 %b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1)
487 %sub = sub <8 x i16> %a, %b
491 define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
492 ; CHECK-LABEL: smaxmin_v4i32:
494 ; CHECK-NEXT: vabd.s32 q0, q0, q1
496 %a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1)
497 %b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1)
498 %sub = sub <4 x i32> %a, %b
502 define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
503 ; CHECK-LABEL: smaxmin_v2i64:
505 ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
506 ; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
507 ; CHECK-NEXT: vmov r1, r0, d3
508 ; CHECK-NEXT: mov r12, #0
509 ; CHECK-NEXT: vmov r4, lr, d1
510 ; CHECK-NEXT: vmov r6, r8, d0
511 ; CHECK-NEXT: subs r2, r4, r1
512 ; CHECK-NEXT: mov r3, r0
513 ; CHECK-NEXT: sbcs r2, lr, r0
514 ; CHECK-NEXT: mov r2, #0
515 ; CHECK-NEXT: movwlt r2, #1
516 ; CHECK-NEXT: cmp r2, #0
517 ; CHECK-NEXT: mov r2, r1
518 ; CHECK-NEXT: movne r3, lr
519 ; CHECK-NEXT: movne r2, r4
520 ; CHECK-NEXT: vmov.32 d17[0], r2
521 ; CHECK-NEXT: vmov.32 d17[1], r3
522 ; CHECK-NEXT: vmov r2, r3, d2
523 ; CHECK-NEXT: subs r5, r2, r6
524 ; CHECK-NEXT: sbcs r5, r3, r8
525 ; CHECK-NEXT: mov r7, r2
526 ; CHECK-NEXT: mov r5, #0
527 ; CHECK-NEXT: movwlt r5, #1
528 ; CHECK-NEXT: cmp r5, #0
529 ; CHECK-NEXT: movne r7, r6
530 ; CHECK-NEXT: vmov.32 d18[0], r7
531 ; CHECK-NEXT: subs r7, r1, r4
532 ; CHECK-NEXT: sbcs r7, r0, lr
533 ; CHECK-NEXT: mov r7, #0
534 ; CHECK-NEXT: movwlt r7, #1
535 ; CHECK-NEXT: cmp r7, #0
536 ; CHECK-NEXT: movne r1, r4
537 ; CHECK-NEXT: vmov.32 d19[0], r1
538 ; CHECK-NEXT: subs r1, r6, r2
539 ; CHECK-NEXT: sbcs r1, r8, r3
540 ; CHECK-NEXT: movwlt r12, #1
541 ; CHECK-NEXT: cmp r12, #0
542 ; CHECK-NEXT: movne r2, r6
543 ; CHECK-NEXT: mov r1, r3
544 ; CHECK-NEXT: vmov.32 d16[0], r2
545 ; CHECK-NEXT: movne r1, r8
546 ; CHECK-NEXT: cmp r7, #0
547 ; CHECK-NEXT: movne r0, lr
548 ; CHECK-NEXT: cmp r5, #0
549 ; CHECK-NEXT: vmov.32 d16[1], r1
550 ; CHECK-NEXT: movne r3, r8
551 ; CHECK-NEXT: vmov.32 d19[1], r0
552 ; CHECK-NEXT: vmov.32 d18[1], r3
553 ; CHECK-NEXT: vsub.i64 q0, q9, q8
554 ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
555 %a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1)
556 %b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1)
557 %sub = sub <2 x i64> %a, %b
561 define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
562 ; CHECK-LABEL: umaxmin_v16i8:
564 ; CHECK-NEXT: vabd.u8 q0, q0, q1
566 %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
567 %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1)
568 %sub = sub <16 x i8> %a, %b
572 define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
573 ; CHECK-LABEL: umaxmin_v8i16:
575 ; CHECK-NEXT: vabd.u16 q0, q0, q1
577 %a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1)
578 %b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1)
579 %sub = sub <8 x i16> %a, %b
583 define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
584 ; CHECK-LABEL: umaxmin_v4i32:
586 ; CHECK-NEXT: vabd.u32 q0, q0, q1
588 %a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1)
589 %b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1)
590 %sub = sub <4 x i32> %a, %b
594 define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
595 ; CHECK-LABEL: umaxmin_v2i64:
597 ; CHECK-NEXT: vqsub.u64 q8, q0, q1
598 ; CHECK-NEXT: vqsub.u64 q9, q1, q0
599 ; CHECK-NEXT: vsub.i64 q8, q8, q0
600 ; CHECK-NEXT: vadd.i64 q9, q0, q9
601 ; CHECK-NEXT: vadd.i64 q0, q9, q8
603 %a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1)
604 %b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1)
605 %sub = sub <2 x i64> %a, %b
609 define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
610 ; CHECK-LABEL: umaxmin_v16i8_com1:
612 ; CHECK-NEXT: vabd.u8 q0, q0, q1
614 %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
615 %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0)
616 %sub = sub <16 x i8> %a, %b