1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
5 ; trunc(abs(sub(zext(a),zext(b)))) -> abdu(a,b)
8 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
9 ; CHECK-LABEL: abd_ext_i8:
11 ; CHECK-NEXT: and w8, w0, #0xff
12 ; CHECK-NEXT: sub w8, w8, w1, uxtb
13 ; CHECK-NEXT: cmp w8, #0
14 ; CHECK-NEXT: cneg w0, w8, mi
16 %aext = zext i8 %a to i64
17 %bext = zext i8 %b to i64
18 %sub = sub i64 %aext, %bext
19 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
20 %trunc = trunc i64 %abs to i8
24 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
25 ; CHECK-LABEL: abd_ext_i8_i16:
27 ; CHECK-NEXT: and w8, w0, #0xff
28 ; CHECK-NEXT: sub w8, w8, w1, uxth
29 ; CHECK-NEXT: cmp w8, #0
30 ; CHECK-NEXT: cneg w0, w8, mi
32 %aext = zext i8 %a to i64
33 %bext = zext i16 %b to i64
34 %sub = sub i64 %aext, %bext
35 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
36 %trunc = trunc i64 %abs to i8
40 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
41 ; CHECK-LABEL: abd_ext_i8_undef:
43 ; CHECK-NEXT: and w8, w0, #0xff
44 ; CHECK-NEXT: sub w8, w8, w1, uxtb
45 ; CHECK-NEXT: cmp w8, #0
46 ; CHECK-NEXT: cneg w0, w8, mi
48 %aext = zext i8 %a to i64
49 %bext = zext i8 %b to i64
50 %sub = sub i64 %aext, %bext
51 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
52 %trunc = trunc i64 %abs to i8
56 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
57 ; CHECK-LABEL: abd_ext_i16:
59 ; CHECK-NEXT: and w8, w0, #0xffff
60 ; CHECK-NEXT: sub w8, w8, w1, uxth
61 ; CHECK-NEXT: cmp w8, #0
62 ; CHECK-NEXT: cneg w0, w8, mi
64 %aext = zext i16 %a to i64
65 %bext = zext i16 %b to i64
66 %sub = sub i64 %aext, %bext
67 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
68 %trunc = trunc i64 %abs to i16
72 define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
73 ; CHECK-LABEL: abd_ext_i16_i32:
75 ; CHECK-NEXT: and w8, w0, #0xffff
76 ; CHECK-NEXT: sub w9, w1, w8
77 ; CHECK-NEXT: subs w8, w8, w1
78 ; CHECK-NEXT: csel w0, w8, w9, hi
80 %aext = zext i16 %a to i64
81 %bext = zext i32 %b to i64
82 %sub = sub i64 %aext, %bext
83 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
84 %trunc = trunc i64 %abs to i16
88 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
89 ; CHECK-LABEL: abd_ext_i16_undef:
91 ; CHECK-NEXT: and w8, w0, #0xffff
92 ; CHECK-NEXT: sub w8, w8, w1, uxth
93 ; CHECK-NEXT: cmp w8, #0
94 ; CHECK-NEXT: cneg w0, w8, mi
96 %aext = zext i16 %a to i64
97 %bext = zext i16 %b to i64
98 %sub = sub i64 %aext, %bext
99 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
100 %trunc = trunc i64 %abs to i16
104 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
105 ; CHECK-LABEL: abd_ext_i32:
107 ; CHECK-NEXT: sub w8, w1, w0
108 ; CHECK-NEXT: subs w9, w0, w1
109 ; CHECK-NEXT: csel w0, w9, w8, hi
111 %aext = zext i32 %a to i64
112 %bext = zext i32 %b to i64
113 %sub = sub i64 %aext, %bext
114 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
115 %trunc = trunc i64 %abs to i32
119 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
120 ; CHECK-LABEL: abd_ext_i32_i16:
122 ; CHECK-NEXT: and w8, w1, #0xffff
123 ; CHECK-NEXT: sub w9, w8, w0
124 ; CHECK-NEXT: subs w8, w0, w8
125 ; CHECK-NEXT: csel w0, w8, w9, hi
127 %aext = zext i32 %a to i64
128 %bext = zext i16 %b to i64
129 %sub = sub i64 %aext, %bext
130 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
131 %trunc = trunc i64 %abs to i32
135 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
136 ; CHECK-LABEL: abd_ext_i32_undef:
138 ; CHECK-NEXT: sub w8, w1, w0
139 ; CHECK-NEXT: subs w9, w0, w1
140 ; CHECK-NEXT: csel w0, w9, w8, hi
142 %aext = zext i32 %a to i64
143 %bext = zext i32 %b to i64
144 %sub = sub i64 %aext, %bext
145 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
146 %trunc = trunc i64 %abs to i32
150 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
151 ; CHECK-LABEL: abd_ext_i64:
153 ; CHECK-NEXT: sub x8, x1, x0
154 ; CHECK-NEXT: subs x9, x0, x1
155 ; CHECK-NEXT: csel x0, x9, x8, hi
157 %aext = zext i64 %a to i128
158 %bext = zext i64 %b to i128
159 %sub = sub i128 %aext, %bext
160 %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
161 %trunc = trunc i128 %abs to i64
165 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
166 ; CHECK-LABEL: abd_ext_i64_undef:
168 ; CHECK-NEXT: sub x8, x1, x0
169 ; CHECK-NEXT: subs x9, x0, x1
170 ; CHECK-NEXT: csel x0, x9, x8, hi
172 %aext = zext i64 %a to i128
173 %bext = zext i64 %b to i128
174 %sub = sub i128 %aext, %bext
175 %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
176 %trunc = trunc i128 %abs to i64
180 define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
181 ; CHECK-LABEL: abd_ext_i128:
183 ; CHECK-NEXT: subs x8, x0, x2
184 ; CHECK-NEXT: sbcs x9, x1, x3
185 ; CHECK-NEXT: cset w10, lo
186 ; CHECK-NEXT: sbfx x10, x10, #0, #1
187 ; CHECK-NEXT: eor x8, x8, x10
188 ; CHECK-NEXT: eor x9, x9, x10
189 ; CHECK-NEXT: subs x0, x8, x10
190 ; CHECK-NEXT: sbc x1, x9, x10
192 %aext = zext i128 %a to i256
193 %bext = zext i128 %b to i256
194 %sub = sub i256 %aext, %bext
195 %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false)
196 %trunc = trunc i256 %abs to i128
200 define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
201 ; CHECK-LABEL: abd_ext_i128_undef:
203 ; CHECK-NEXT: subs x8, x0, x2
204 ; CHECK-NEXT: sbcs x9, x1, x3
205 ; CHECK-NEXT: cset w10, lo
206 ; CHECK-NEXT: sbfx x10, x10, #0, #1
207 ; CHECK-NEXT: eor x8, x8, x10
208 ; CHECK-NEXT: eor x9, x9, x10
209 ; CHECK-NEXT: subs x0, x8, x10
210 ; CHECK-NEXT: sbc x1, x9, x10
212 %aext = zext i128 %a to i256
213 %bext = zext i128 %b to i256
214 %sub = sub i256 %aext, %bext
215 %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true)
216 %trunc = trunc i256 %abs to i128
221 ; sub(umax(a,b),umin(a,b)) -> abdu(a,b)
224 define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
225 ; CHECK-LABEL: abd_minmax_i8:
227 ; CHECK-NEXT: and w8, w0, #0xff
228 ; CHECK-NEXT: sub w8, w8, w1, uxtb
229 ; CHECK-NEXT: cmp w8, #0
230 ; CHECK-NEXT: cneg w0, w8, mi
232 %min = call i8 @llvm.umin.i8(i8 %a, i8 %b)
233 %max = call i8 @llvm.umax.i8(i8 %a, i8 %b)
234 %sub = sub i8 %max, %min
238 define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
239 ; CHECK-LABEL: abd_minmax_i16:
241 ; CHECK-NEXT: and w8, w0, #0xffff
242 ; CHECK-NEXT: sub w8, w8, w1, uxth
243 ; CHECK-NEXT: cmp w8, #0
244 ; CHECK-NEXT: cneg w0, w8, mi
246 %min = call i16 @llvm.umin.i16(i16 %a, i16 %b)
247 %max = call i16 @llvm.umax.i16(i16 %a, i16 %b)
248 %sub = sub i16 %max, %min
252 define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
253 ; CHECK-LABEL: abd_minmax_i32:
255 ; CHECK-NEXT: sub w8, w1, w0
256 ; CHECK-NEXT: subs w9, w0, w1
257 ; CHECK-NEXT: csel w0, w9, w8, hi
259 %min = call i32 @llvm.umin.i32(i32 %a, i32 %b)
260 %max = call i32 @llvm.umax.i32(i32 %a, i32 %b)
261 %sub = sub i32 %max, %min
265 define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
266 ; CHECK-LABEL: abd_minmax_i64:
268 ; CHECK-NEXT: sub x8, x1, x0
269 ; CHECK-NEXT: subs x9, x0, x1
270 ; CHECK-NEXT: csel x0, x9, x8, hi
272 %min = call i64 @llvm.umin.i64(i64 %a, i64 %b)
273 %max = call i64 @llvm.umax.i64(i64 %a, i64 %b)
274 %sub = sub i64 %max, %min
278 define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
279 ; CHECK-LABEL: abd_minmax_i128:
281 ; CHECK-NEXT: subs x8, x0, x2
282 ; CHECK-NEXT: sbcs x9, x1, x3
283 ; CHECK-NEXT: cset w10, lo
284 ; CHECK-NEXT: sbfx x10, x10, #0, #1
285 ; CHECK-NEXT: eor x8, x8, x10
286 ; CHECK-NEXT: eor x9, x9, x10
287 ; CHECK-NEXT: subs x0, x8, x10
288 ; CHECK-NEXT: sbc x1, x9, x10
290 %min = call i128 @llvm.umin.i128(i128 %a, i128 %b)
291 %max = call i128 @llvm.umax.i128(i128 %a, i128 %b)
292 %sub = sub i128 %max, %min
297 ; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b)
300 define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
301 ; CHECK-LABEL: abd_cmp_i8:
303 ; CHECK-NEXT: and w8, w0, #0xff
304 ; CHECK-NEXT: sub w8, w8, w1, uxtb
305 ; CHECK-NEXT: cmp w8, #0
306 ; CHECK-NEXT: cneg w0, w8, mi
308 %cmp = icmp ugt i8 %a, %b
311 %sel = select i1 %cmp, i8 %ab, i8 %ba
315 define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
316 ; CHECK-LABEL: abd_cmp_i16:
318 ; CHECK-NEXT: and w8, w0, #0xffff
319 ; CHECK-NEXT: sub w8, w8, w1, uxth
320 ; CHECK-NEXT: cmp w8, #0
321 ; CHECK-NEXT: cneg w0, w8, mi
323 %cmp = icmp uge i16 %a, %b
326 %sel = select i1 %cmp, i16 %ab, i16 %ba
330 define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
331 ; CHECK-LABEL: abd_cmp_i32:
333 ; CHECK-NEXT: sub w8, w1, w0
334 ; CHECK-NEXT: subs w9, w0, w1
335 ; CHECK-NEXT: csel w0, w9, w8, hi
337 %cmp = icmp ult i32 %a, %b
340 %sel = select i1 %cmp, i32 %ba, i32 %ab
344 define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
345 ; CHECK-LABEL: abd_cmp_i64:
347 ; CHECK-NEXT: sub x8, x1, x0
348 ; CHECK-NEXT: subs x9, x0, x1
349 ; CHECK-NEXT: csel x0, x9, x8, hi
351 %cmp = icmp uge i64 %a, %b
354 %sel = select i1 %cmp, i64 %ab, i64 %ba
358 define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
359 ; CHECK-LABEL: abd_cmp_i128:
361 ; CHECK-NEXT: subs x8, x0, x2
362 ; CHECK-NEXT: sbcs x9, x1, x3
363 ; CHECK-NEXT: cset w10, lo
364 ; CHECK-NEXT: sbfx x10, x10, #0, #1
365 ; CHECK-NEXT: eor x8, x8, x10
366 ; CHECK-NEXT: eor x9, x9, x10
367 ; CHECK-NEXT: subs x0, x8, x10
368 ; CHECK-NEXT: sbc x1, x9, x10
370 %cmp = icmp uge i128 %a, %b
371 %ab = sub i128 %a, %b
372 %ba = sub i128 %b, %a
373 %sel = select i1 %cmp, i128 %ab, i128 %ba
381 define i64 @vector_legalized(i16 %a, i16 %b) {
382 ; CHECK-LABEL: vector_legalized:
384 ; CHECK-NEXT: movi v0.2d, #0000000000000000
385 ; CHECK-NEXT: and w8, w0, #0xffff
386 ; CHECK-NEXT: sub w8, w8, w1, uxth
387 ; CHECK-NEXT: cmp w8, #0
388 ; CHECK-NEXT: addp d0, v0.2d
389 ; CHECK-NEXT: cneg w8, w8, mi
390 ; CHECK-NEXT: fmov x9, d0
391 ; CHECK-NEXT: add x0, x9, x8
393 %ea = zext i16 %a to i32
394 %eb = zext i16 %b to i32
395 %s = sub i32 %ea, %eb
396 %ab = call i32 @llvm.abs.i32(i32 %s, i1 false)
397 %e = zext i32 %ab to i64
398 %red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> zeroinitializer)
399 %z = add i64 %red, %e
404 ; sub(select(icmp(a,b),a,b),select(icmp(a,b),b,a)) -> abdu(a,b)
407 define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
408 ; CHECK-LABEL: abd_select_i8:
410 ; CHECK-NEXT: and w8, w0, #0xff
411 ; CHECK-NEXT: sub w8, w8, w1, uxtb
412 ; CHECK-NEXT: cmp w8, #0
413 ; CHECK-NEXT: cneg w0, w8, mi
415 %cmp = icmp ult i8 %a, %b
416 %ab = select i1 %cmp, i8 %a, i8 %b
417 %ba = select i1 %cmp, i8 %b, i8 %a
418 %sub = sub i8 %ba, %ab
422 define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
423 ; CHECK-LABEL: abd_select_i16:
425 ; CHECK-NEXT: and w8, w0, #0xffff
426 ; CHECK-NEXT: sub w8, w8, w1, uxth
427 ; CHECK-NEXT: cmp w8, #0
428 ; CHECK-NEXT: cneg w0, w8, mi
430 %cmp = icmp ule i16 %a, %b
431 %ab = select i1 %cmp, i16 %a, i16 %b
432 %ba = select i1 %cmp, i16 %b, i16 %a
433 %sub = sub i16 %ba, %ab
437 define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
438 ; CHECK-LABEL: abd_select_i32:
440 ; CHECK-NEXT: sub w8, w1, w0
441 ; CHECK-NEXT: subs w9, w0, w1
442 ; CHECK-NEXT: csel w0, w9, w8, hi
444 %cmp = icmp ugt i32 %a, %b
445 %ab = select i1 %cmp, i32 %a, i32 %b
446 %ba = select i1 %cmp, i32 %b, i32 %a
447 %sub = sub i32 %ab, %ba
451 define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
452 ; CHECK-LABEL: abd_select_i64:
454 ; CHECK-NEXT: sub x8, x1, x0
455 ; CHECK-NEXT: subs x9, x0, x1
456 ; CHECK-NEXT: csel x0, x9, x8, hi
458 %cmp = icmp uge i64 %a, %b
459 %ab = select i1 %cmp, i64 %a, i64 %b
460 %ba = select i1 %cmp, i64 %b, i64 %a
461 %sub = sub i64 %ab, %ba
465 define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
466 ; CHECK-LABEL: abd_select_i128:
468 ; CHECK-NEXT: subs x8, x0, x2
469 ; CHECK-NEXT: sbcs x9, x1, x3
470 ; CHECK-NEXT: cset w10, lo
471 ; CHECK-NEXT: sbfx x10, x10, #0, #1
472 ; CHECK-NEXT: eor x8, x8, x10
473 ; CHECK-NEXT: eor x9, x9, x10
474 ; CHECK-NEXT: subs x0, x8, x10
475 ; CHECK-NEXT: sbc x1, x9, x10
477 %cmp = icmp ult i128 %a, %b
478 %ab = select i1 %cmp, i128 %a, i128 %b
479 %ba = select i1 %cmp, i128 %b, i128 %a
480 %sub = sub i128 %ba, %ab
484 declare i8 @llvm.abs.i8(i8, i1)
485 declare i16 @llvm.abs.i16(i16, i1)
486 declare i32 @llvm.abs.i32(i32, i1)
487 declare i64 @llvm.abs.i64(i64, i1)
488 declare i128 @llvm.abs.i128(i128, i1)
490 declare i8 @llvm.umax.i8(i8, i8)
491 declare i16 @llvm.umax.i16(i16, i16)
492 declare i32 @llvm.umax.i32(i32, i32)
493 declare i64 @llvm.umax.i64(i64, i64)
495 declare i8 @llvm.umin.i8(i8, i8)
496 declare i16 @llvm.umin.i16(i16, i16)
497 declare i32 @llvm.umin.i32(i32, i32)
498 declare i64 @llvm.umin.i64(i64, i64)