1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
3 ; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
4 ; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb | FileCheck %s --check-prefixes=ZBB,RV32ZBB
5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb | FileCheck %s --check-prefixes=ZBB,RV64ZBB
8 ; trunc(abs(sub(sext(a),sext(b)))) -> abds(a,b)
11 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
12 ; RV32I-LABEL: abd_ext_i8:
14 ; RV32I-NEXT: slli a1, a1, 24
15 ; RV32I-NEXT: slli a0, a0, 24
16 ; RV32I-NEXT: srai a1, a1, 24
17 ; RV32I-NEXT: srai a0, a0, 24
18 ; RV32I-NEXT: sub a0, a0, a1
19 ; RV32I-NEXT: srai a1, a0, 31
20 ; RV32I-NEXT: xor a0, a0, a1
21 ; RV32I-NEXT: sub a0, a0, a1
24 ; RV64I-LABEL: abd_ext_i8:
26 ; RV64I-NEXT: slli a1, a1, 56
27 ; RV64I-NEXT: slli a0, a0, 56
28 ; RV64I-NEXT: srai a1, a1, 56
29 ; RV64I-NEXT: srai a0, a0, 56
30 ; RV64I-NEXT: sub a0, a0, a1
31 ; RV64I-NEXT: srai a1, a0, 63
32 ; RV64I-NEXT: xor a0, a0, a1
33 ; RV64I-NEXT: sub a0, a0, a1
36 ; ZBB-LABEL: abd_ext_i8:
38 ; ZBB-NEXT: sext.b a1, a1
39 ; ZBB-NEXT: sext.b a0, a0
40 ; ZBB-NEXT: min a2, a0, a1
41 ; ZBB-NEXT: max a0, a0, a1
42 ; ZBB-NEXT: sub a0, a0, a2
44 %aext = sext i8 %a to i64
45 %bext = sext i8 %b to i64
46 %sub = sub i64 %aext, %bext
47 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
48 %trunc = trunc i64 %abs to i8
52 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
53 ; RV32I-LABEL: abd_ext_i8_i16:
55 ; RV32I-NEXT: slli a1, a1, 16
56 ; RV32I-NEXT: slli a0, a0, 24
57 ; RV32I-NEXT: srai a1, a1, 16
58 ; RV32I-NEXT: srai a0, a0, 24
59 ; RV32I-NEXT: sub a0, a0, a1
60 ; RV32I-NEXT: srai a1, a0, 31
61 ; RV32I-NEXT: xor a0, a0, a1
62 ; RV32I-NEXT: sub a0, a0, a1
65 ; RV64I-LABEL: abd_ext_i8_i16:
67 ; RV64I-NEXT: slli a1, a1, 48
68 ; RV64I-NEXT: slli a0, a0, 56
69 ; RV64I-NEXT: srai a1, a1, 48
70 ; RV64I-NEXT: srai a0, a0, 56
71 ; RV64I-NEXT: sub a0, a0, a1
72 ; RV64I-NEXT: srai a1, a0, 63
73 ; RV64I-NEXT: xor a0, a0, a1
74 ; RV64I-NEXT: sub a0, a0, a1
77 ; ZBB-LABEL: abd_ext_i8_i16:
79 ; ZBB-NEXT: sext.h a1, a1
80 ; ZBB-NEXT: sext.b a0, a0
81 ; ZBB-NEXT: min a2, a0, a1
82 ; ZBB-NEXT: max a0, a0, a1
83 ; ZBB-NEXT: sub a0, a0, a2
85 %aext = sext i8 %a to i64
86 %bext = sext i16 %b to i64
87 %sub = sub i64 %aext, %bext
88 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
89 %trunc = trunc i64 %abs to i8
93 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
94 ; RV32I-LABEL: abd_ext_i8_undef:
96 ; RV32I-NEXT: slli a1, a1, 24
97 ; RV32I-NEXT: slli a0, a0, 24
98 ; RV32I-NEXT: srai a1, a1, 24
99 ; RV32I-NEXT: srai a0, a0, 24
100 ; RV32I-NEXT: sub a0, a0, a1
101 ; RV32I-NEXT: srai a1, a0, 31
102 ; RV32I-NEXT: xor a0, a0, a1
103 ; RV32I-NEXT: sub a0, a0, a1
106 ; RV64I-LABEL: abd_ext_i8_undef:
108 ; RV64I-NEXT: slli a1, a1, 56
109 ; RV64I-NEXT: slli a0, a0, 56
110 ; RV64I-NEXT: srai a1, a1, 56
111 ; RV64I-NEXT: srai a0, a0, 56
112 ; RV64I-NEXT: sub a0, a0, a1
113 ; RV64I-NEXT: srai a1, a0, 63
114 ; RV64I-NEXT: xor a0, a0, a1
115 ; RV64I-NEXT: sub a0, a0, a1
118 ; ZBB-LABEL: abd_ext_i8_undef:
120 ; ZBB-NEXT: sext.b a1, a1
121 ; ZBB-NEXT: sext.b a0, a0
122 ; ZBB-NEXT: min a2, a0, a1
123 ; ZBB-NEXT: max a0, a0, a1
124 ; ZBB-NEXT: sub a0, a0, a2
126 %aext = sext i8 %a to i64
127 %bext = sext i8 %b to i64
128 %sub = sub i64 %aext, %bext
129 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
130 %trunc = trunc i64 %abs to i8
134 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
135 ; RV32I-LABEL: abd_ext_i16:
137 ; RV32I-NEXT: slli a1, a1, 16
138 ; RV32I-NEXT: slli a0, a0, 16
139 ; RV32I-NEXT: srai a1, a1, 16
140 ; RV32I-NEXT: srai a0, a0, 16
141 ; RV32I-NEXT: sub a0, a0, a1
142 ; RV32I-NEXT: srai a1, a0, 31
143 ; RV32I-NEXT: xor a0, a0, a1
144 ; RV32I-NEXT: sub a0, a0, a1
147 ; RV64I-LABEL: abd_ext_i16:
149 ; RV64I-NEXT: slli a1, a1, 48
150 ; RV64I-NEXT: slli a0, a0, 48
151 ; RV64I-NEXT: srai a1, a1, 48
152 ; RV64I-NEXT: srai a0, a0, 48
153 ; RV64I-NEXT: sub a0, a0, a1
154 ; RV64I-NEXT: srai a1, a0, 63
155 ; RV64I-NEXT: xor a0, a0, a1
156 ; RV64I-NEXT: sub a0, a0, a1
159 ; ZBB-LABEL: abd_ext_i16:
161 ; ZBB-NEXT: sext.h a1, a1
162 ; ZBB-NEXT: sext.h a0, a0
163 ; ZBB-NEXT: min a2, a0, a1
164 ; ZBB-NEXT: max a0, a0, a1
165 ; ZBB-NEXT: sub a0, a0, a2
167 %aext = sext i16 %a to i64
168 %bext = sext i16 %b to i64
169 %sub = sub i64 %aext, %bext
170 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
171 %trunc = trunc i64 %abs to i16
175 define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
176 ; RV32I-LABEL: abd_ext_i16_i32:
178 ; RV32I-NEXT: slli a0, a0, 16
179 ; RV32I-NEXT: srai a0, a0, 16
180 ; RV32I-NEXT: blt a1, a0, .LBB4_2
181 ; RV32I-NEXT: # %bb.1:
182 ; RV32I-NEXT: sub a0, a1, a0
184 ; RV32I-NEXT: .LBB4_2:
185 ; RV32I-NEXT: sub a0, a0, a1
188 ; RV64I-LABEL: abd_ext_i16_i32:
190 ; RV64I-NEXT: sext.w a1, a1
191 ; RV64I-NEXT: slli a0, a0, 48
192 ; RV64I-NEXT: srai a0, a0, 48
193 ; RV64I-NEXT: sub a0, a0, a1
194 ; RV64I-NEXT: srai a1, a0, 63
195 ; RV64I-NEXT: xor a0, a0, a1
196 ; RV64I-NEXT: sub a0, a0, a1
199 ; RV32ZBB-LABEL: abd_ext_i16_i32:
201 ; RV32ZBB-NEXT: sext.h a0, a0
202 ; RV32ZBB-NEXT: min a2, a0, a1
203 ; RV32ZBB-NEXT: max a0, a0, a1
204 ; RV32ZBB-NEXT: sub a0, a0, a2
207 ; RV64ZBB-LABEL: abd_ext_i16_i32:
209 ; RV64ZBB-NEXT: sext.w a1, a1
210 ; RV64ZBB-NEXT: sext.h a0, a0
211 ; RV64ZBB-NEXT: min a2, a0, a1
212 ; RV64ZBB-NEXT: max a0, a0, a1
213 ; RV64ZBB-NEXT: sub a0, a0, a2
215 %aext = sext i16 %a to i64
216 %bext = sext i32 %b to i64
217 %sub = sub i64 %aext, %bext
218 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
219 %trunc = trunc i64 %abs to i16
223 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
224 ; RV32I-LABEL: abd_ext_i16_undef:
226 ; RV32I-NEXT: slli a1, a1, 16
227 ; RV32I-NEXT: slli a0, a0, 16
228 ; RV32I-NEXT: srai a1, a1, 16
229 ; RV32I-NEXT: srai a0, a0, 16
230 ; RV32I-NEXT: sub a0, a0, a1
231 ; RV32I-NEXT: srai a1, a0, 31
232 ; RV32I-NEXT: xor a0, a0, a1
233 ; RV32I-NEXT: sub a0, a0, a1
236 ; RV64I-LABEL: abd_ext_i16_undef:
238 ; RV64I-NEXT: slli a1, a1, 48
239 ; RV64I-NEXT: slli a0, a0, 48
240 ; RV64I-NEXT: srai a1, a1, 48
241 ; RV64I-NEXT: srai a0, a0, 48
242 ; RV64I-NEXT: sub a0, a0, a1
243 ; RV64I-NEXT: srai a1, a0, 63
244 ; RV64I-NEXT: xor a0, a0, a1
245 ; RV64I-NEXT: sub a0, a0, a1
248 ; ZBB-LABEL: abd_ext_i16_undef:
250 ; ZBB-NEXT: sext.h a1, a1
251 ; ZBB-NEXT: sext.h a0, a0
252 ; ZBB-NEXT: min a2, a0, a1
253 ; ZBB-NEXT: max a0, a0, a1
254 ; ZBB-NEXT: sub a0, a0, a2
256 %aext = sext i16 %a to i64
257 %bext = sext i16 %b to i64
258 %sub = sub i64 %aext, %bext
259 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
260 %trunc = trunc i64 %abs to i16
264 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
265 ; RV32I-LABEL: abd_ext_i32:
267 ; RV32I-NEXT: blt a1, a0, .LBB6_2
268 ; RV32I-NEXT: # %bb.1:
269 ; RV32I-NEXT: sub a0, a1, a0
271 ; RV32I-NEXT: .LBB6_2:
272 ; RV32I-NEXT: sub a0, a0, a1
275 ; RV64I-LABEL: abd_ext_i32:
277 ; RV64I-NEXT: sext.w a1, a1
278 ; RV64I-NEXT: sext.w a0, a0
279 ; RV64I-NEXT: sub a0, a0, a1
280 ; RV64I-NEXT: srai a1, a0, 63
281 ; RV64I-NEXT: xor a0, a0, a1
282 ; RV64I-NEXT: sub a0, a0, a1
285 ; RV32ZBB-LABEL: abd_ext_i32:
287 ; RV32ZBB-NEXT: min a2, a0, a1
288 ; RV32ZBB-NEXT: max a0, a0, a1
289 ; RV32ZBB-NEXT: sub a0, a0, a2
292 ; RV64ZBB-LABEL: abd_ext_i32:
294 ; RV64ZBB-NEXT: sext.w a1, a1
295 ; RV64ZBB-NEXT: sext.w a0, a0
296 ; RV64ZBB-NEXT: min a2, a0, a1
297 ; RV64ZBB-NEXT: max a0, a0, a1
298 ; RV64ZBB-NEXT: sub a0, a0, a2
300 %aext = sext i32 %a to i64
301 %bext = sext i32 %b to i64
302 %sub = sub i64 %aext, %bext
303 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
304 %trunc = trunc i64 %abs to i32
308 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
309 ; RV32I-LABEL: abd_ext_i32_i16:
311 ; RV32I-NEXT: slli a1, a1, 16
312 ; RV32I-NEXT: srai a1, a1, 16
313 ; RV32I-NEXT: blt a1, a0, .LBB7_2
314 ; RV32I-NEXT: # %bb.1:
315 ; RV32I-NEXT: sub a0, a1, a0
317 ; RV32I-NEXT: .LBB7_2:
318 ; RV32I-NEXT: sub a0, a0, a1
321 ; RV64I-LABEL: abd_ext_i32_i16:
323 ; RV64I-NEXT: sext.w a0, a0
324 ; RV64I-NEXT: slli a1, a1, 48
325 ; RV64I-NEXT: srai a1, a1, 48
326 ; RV64I-NEXT: sub a0, a0, a1
327 ; RV64I-NEXT: srai a1, a0, 63
328 ; RV64I-NEXT: xor a0, a0, a1
329 ; RV64I-NEXT: sub a0, a0, a1
332 ; RV32ZBB-LABEL: abd_ext_i32_i16:
334 ; RV32ZBB-NEXT: sext.h a1, a1
335 ; RV32ZBB-NEXT: min a2, a0, a1
336 ; RV32ZBB-NEXT: max a0, a0, a1
337 ; RV32ZBB-NEXT: sub a0, a0, a2
340 ; RV64ZBB-LABEL: abd_ext_i32_i16:
342 ; RV64ZBB-NEXT: sext.w a0, a0
343 ; RV64ZBB-NEXT: sext.h a1, a1
344 ; RV64ZBB-NEXT: min a2, a0, a1
345 ; RV64ZBB-NEXT: max a0, a0, a1
346 ; RV64ZBB-NEXT: sub a0, a0, a2
348 %aext = sext i32 %a to i64
349 %bext = sext i16 %b to i64
350 %sub = sub i64 %aext, %bext
351 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
352 %trunc = trunc i64 %abs to i32
356 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
357 ; RV32I-LABEL: abd_ext_i32_undef:
359 ; RV32I-NEXT: blt a1, a0, .LBB8_2
360 ; RV32I-NEXT: # %bb.1:
361 ; RV32I-NEXT: sub a0, a1, a0
363 ; RV32I-NEXT: .LBB8_2:
364 ; RV32I-NEXT: sub a0, a0, a1
367 ; RV64I-LABEL: abd_ext_i32_undef:
369 ; RV64I-NEXT: sext.w a1, a1
370 ; RV64I-NEXT: sext.w a0, a0
371 ; RV64I-NEXT: sub a0, a0, a1
372 ; RV64I-NEXT: srai a1, a0, 63
373 ; RV64I-NEXT: xor a0, a0, a1
374 ; RV64I-NEXT: sub a0, a0, a1
377 ; RV32ZBB-LABEL: abd_ext_i32_undef:
379 ; RV32ZBB-NEXT: min a2, a0, a1
380 ; RV32ZBB-NEXT: max a0, a0, a1
381 ; RV32ZBB-NEXT: sub a0, a0, a2
384 ; RV64ZBB-LABEL: abd_ext_i32_undef:
386 ; RV64ZBB-NEXT: sext.w a1, a1
387 ; RV64ZBB-NEXT: sext.w a0, a0
388 ; RV64ZBB-NEXT: min a2, a0, a1
389 ; RV64ZBB-NEXT: max a0, a0, a1
390 ; RV64ZBB-NEXT: sub a0, a0, a2
392 %aext = sext i32 %a to i64
393 %bext = sext i32 %b to i64
394 %sub = sub i64 %aext, %bext
395 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
396 %trunc = trunc i64 %abs to i32
400 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
401 ; RV32I-LABEL: abd_ext_i64:
403 ; RV32I-NEXT: sltu a4, a2, a0
404 ; RV32I-NEXT: mv a5, a4
405 ; RV32I-NEXT: beq a1, a3, .LBB9_2
406 ; RV32I-NEXT: # %bb.1:
407 ; RV32I-NEXT: slt a5, a3, a1
408 ; RV32I-NEXT: .LBB9_2:
409 ; RV32I-NEXT: bnez a5, .LBB9_4
410 ; RV32I-NEXT: # %bb.3:
411 ; RV32I-NEXT: sub a1, a3, a1
412 ; RV32I-NEXT: sub a1, a1, a4
413 ; RV32I-NEXT: sub a0, a2, a0
415 ; RV32I-NEXT: .LBB9_4:
416 ; RV32I-NEXT: sltu a4, a0, a2
417 ; RV32I-NEXT: sub a1, a1, a3
418 ; RV32I-NEXT: sub a1, a1, a4
419 ; RV32I-NEXT: sub a0, a0, a2
422 ; RV64I-LABEL: abd_ext_i64:
424 ; RV64I-NEXT: blt a1, a0, .LBB9_2
425 ; RV64I-NEXT: # %bb.1:
426 ; RV64I-NEXT: sub a0, a1, a0
428 ; RV64I-NEXT: .LBB9_2:
429 ; RV64I-NEXT: sub a0, a0, a1
432 ; RV32ZBB-LABEL: abd_ext_i64:
434 ; RV32ZBB-NEXT: sltu a4, a2, a0
435 ; RV32ZBB-NEXT: mv a5, a4
436 ; RV32ZBB-NEXT: beq a1, a3, .LBB9_2
437 ; RV32ZBB-NEXT: # %bb.1:
438 ; RV32ZBB-NEXT: slt a5, a3, a1
439 ; RV32ZBB-NEXT: .LBB9_2:
440 ; RV32ZBB-NEXT: bnez a5, .LBB9_4
441 ; RV32ZBB-NEXT: # %bb.3:
442 ; RV32ZBB-NEXT: sub a1, a3, a1
443 ; RV32ZBB-NEXT: sub a1, a1, a4
444 ; RV32ZBB-NEXT: sub a0, a2, a0
446 ; RV32ZBB-NEXT: .LBB9_4:
447 ; RV32ZBB-NEXT: sltu a4, a0, a2
448 ; RV32ZBB-NEXT: sub a1, a1, a3
449 ; RV32ZBB-NEXT: sub a1, a1, a4
450 ; RV32ZBB-NEXT: sub a0, a0, a2
453 ; RV64ZBB-LABEL: abd_ext_i64:
455 ; RV64ZBB-NEXT: min a2, a0, a1
456 ; RV64ZBB-NEXT: max a0, a0, a1
457 ; RV64ZBB-NEXT: sub a0, a0, a2
459 %aext = sext i64 %a to i128
460 %bext = sext i64 %b to i128
461 %sub = sub i128 %aext, %bext
462 %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
463 %trunc = trunc i128 %abs to i64
467 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
468 ; RV32I-LABEL: abd_ext_i64_undef:
470 ; RV32I-NEXT: sltu a4, a2, a0
471 ; RV32I-NEXT: mv a5, a4
472 ; RV32I-NEXT: beq a1, a3, .LBB10_2
473 ; RV32I-NEXT: # %bb.1:
474 ; RV32I-NEXT: slt a5, a3, a1
475 ; RV32I-NEXT: .LBB10_2:
476 ; RV32I-NEXT: bnez a5, .LBB10_4
477 ; RV32I-NEXT: # %bb.3:
478 ; RV32I-NEXT: sub a1, a3, a1
479 ; RV32I-NEXT: sub a1, a1, a4
480 ; RV32I-NEXT: sub a0, a2, a0
482 ; RV32I-NEXT: .LBB10_4:
483 ; RV32I-NEXT: sltu a4, a0, a2
484 ; RV32I-NEXT: sub a1, a1, a3
485 ; RV32I-NEXT: sub a1, a1, a4
486 ; RV32I-NEXT: sub a0, a0, a2
489 ; RV64I-LABEL: abd_ext_i64_undef:
491 ; RV64I-NEXT: blt a1, a0, .LBB10_2
492 ; RV64I-NEXT: # %bb.1:
493 ; RV64I-NEXT: sub a0, a1, a0
495 ; RV64I-NEXT: .LBB10_2:
496 ; RV64I-NEXT: sub a0, a0, a1
499 ; RV32ZBB-LABEL: abd_ext_i64_undef:
501 ; RV32ZBB-NEXT: sltu a4, a2, a0
502 ; RV32ZBB-NEXT: mv a5, a4
503 ; RV32ZBB-NEXT: beq a1, a3, .LBB10_2
504 ; RV32ZBB-NEXT: # %bb.1:
505 ; RV32ZBB-NEXT: slt a5, a3, a1
506 ; RV32ZBB-NEXT: .LBB10_2:
507 ; RV32ZBB-NEXT: bnez a5, .LBB10_4
508 ; RV32ZBB-NEXT: # %bb.3:
509 ; RV32ZBB-NEXT: sub a1, a3, a1
510 ; RV32ZBB-NEXT: sub a1, a1, a4
511 ; RV32ZBB-NEXT: sub a0, a2, a0
513 ; RV32ZBB-NEXT: .LBB10_4:
514 ; RV32ZBB-NEXT: sltu a4, a0, a2
515 ; RV32ZBB-NEXT: sub a1, a1, a3
516 ; RV32ZBB-NEXT: sub a1, a1, a4
517 ; RV32ZBB-NEXT: sub a0, a0, a2
520 ; RV64ZBB-LABEL: abd_ext_i64_undef:
522 ; RV64ZBB-NEXT: min a2, a0, a1
523 ; RV64ZBB-NEXT: max a0, a0, a1
524 ; RV64ZBB-NEXT: sub a0, a0, a2
526 %aext = sext i64 %a to i128
527 %bext = sext i64 %b to i128
528 %sub = sub i128 %aext, %bext
529 %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
530 %trunc = trunc i128 %abs to i64
534 define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
535 ; RV32I-LABEL: abd_ext_i128:
537 ; RV32I-NEXT: lw a3, 0(a1)
538 ; RV32I-NEXT: lw a4, 4(a1)
539 ; RV32I-NEXT: lw a6, 8(a1)
540 ; RV32I-NEXT: lw t0, 12(a1)
541 ; RV32I-NEXT: lw a7, 8(a2)
542 ; RV32I-NEXT: lw t1, 12(a2)
543 ; RV32I-NEXT: lw a5, 0(a2)
544 ; RV32I-NEXT: lw a1, 4(a2)
545 ; RV32I-NEXT: sltu a2, a7, a6
546 ; RV32I-NEXT: mv t4, a2
547 ; RV32I-NEXT: beq t0, t1, .LBB11_2
548 ; RV32I-NEXT: # %bb.1:
549 ; RV32I-NEXT: slt t4, t1, t0
550 ; RV32I-NEXT: .LBB11_2:
551 ; RV32I-NEXT: sltu t2, a5, a3
552 ; RV32I-NEXT: sltu t5, a1, a4
553 ; RV32I-NEXT: mv t3, t2
554 ; RV32I-NEXT: beq a4, a1, .LBB11_4
555 ; RV32I-NEXT: # %bb.3:
556 ; RV32I-NEXT: mv t3, t5
557 ; RV32I-NEXT: .LBB11_4:
558 ; RV32I-NEXT: addi sp, sp, -16
559 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
560 ; RV32I-NEXT: xor t6, t0, t1
561 ; RV32I-NEXT: xor s0, a6, a7
562 ; RV32I-NEXT: or t6, s0, t6
563 ; RV32I-NEXT: beqz t6, .LBB11_6
564 ; RV32I-NEXT: # %bb.5:
565 ; RV32I-NEXT: mv t3, t4
566 ; RV32I-NEXT: .LBB11_6:
567 ; RV32I-NEXT: mv t4, t2
568 ; RV32I-NEXT: beq a1, a4, .LBB11_8
569 ; RV32I-NEXT: # %bb.7:
570 ; RV32I-NEXT: mv t4, t5
571 ; RV32I-NEXT: .LBB11_8:
572 ; RV32I-NEXT: sltu t5, a3, a5
573 ; RV32I-NEXT: mv t6, t5
574 ; RV32I-NEXT: beq a4, a1, .LBB11_10
575 ; RV32I-NEXT: # %bb.9:
576 ; RV32I-NEXT: sltu t6, a4, a1
577 ; RV32I-NEXT: .LBB11_10:
578 ; RV32I-NEXT: bnez t3, .LBB11_12
579 ; RV32I-NEXT: # %bb.11:
580 ; RV32I-NEXT: sub t0, t1, t0
581 ; RV32I-NEXT: sub a6, a7, a6
582 ; RV32I-NEXT: sub a3, a5, a3
583 ; RV32I-NEXT: sub a1, a1, a4
584 ; RV32I-NEXT: sub a4, t0, a2
585 ; RV32I-NEXT: sltu a5, a6, t4
586 ; RV32I-NEXT: sub a2, a1, t2
587 ; RV32I-NEXT: sub a1, a4, a5
588 ; RV32I-NEXT: sub a4, a6, t4
589 ; RV32I-NEXT: j .LBB11_13
590 ; RV32I-NEXT: .LBB11_12:
591 ; RV32I-NEXT: sltu a2, a6, a7
592 ; RV32I-NEXT: sub t0, t0, t1
593 ; RV32I-NEXT: sub a6, a6, a7
594 ; RV32I-NEXT: sub a3, a3, a5
595 ; RV32I-NEXT: sub a4, a4, a1
596 ; RV32I-NEXT: sub a1, t0, a2
597 ; RV32I-NEXT: sltu a5, a6, t6
598 ; RV32I-NEXT: sub a2, a4, t5
599 ; RV32I-NEXT: sub a1, a1, a5
600 ; RV32I-NEXT: sub a4, a6, t6
601 ; RV32I-NEXT: .LBB11_13:
602 ; RV32I-NEXT: sw a3, 0(a0)
603 ; RV32I-NEXT: sw a2, 4(a0)
604 ; RV32I-NEXT: sw a4, 8(a0)
605 ; RV32I-NEXT: sw a1, 12(a0)
606 ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
607 ; RV32I-NEXT: addi sp, sp, 16
610 ; RV64I-LABEL: abd_ext_i128:
612 ; RV64I-NEXT: sltu a4, a2, a0
613 ; RV64I-NEXT: mv a5, a4
614 ; RV64I-NEXT: beq a1, a3, .LBB11_2
615 ; RV64I-NEXT: # %bb.1:
616 ; RV64I-NEXT: slt a5, a3, a1
617 ; RV64I-NEXT: .LBB11_2:
618 ; RV64I-NEXT: bnez a5, .LBB11_4
619 ; RV64I-NEXT: # %bb.3:
620 ; RV64I-NEXT: sub a1, a3, a1
621 ; RV64I-NEXT: sub a1, a1, a4
622 ; RV64I-NEXT: sub a0, a2, a0
624 ; RV64I-NEXT: .LBB11_4:
625 ; RV64I-NEXT: sltu a4, a0, a2
626 ; RV64I-NEXT: sub a1, a1, a3
627 ; RV64I-NEXT: sub a1, a1, a4
628 ; RV64I-NEXT: sub a0, a0, a2
631 ; RV32ZBB-LABEL: abd_ext_i128:
633 ; RV32ZBB-NEXT: lw a3, 0(a1)
634 ; RV32ZBB-NEXT: lw a4, 4(a1)
635 ; RV32ZBB-NEXT: lw a6, 8(a1)
636 ; RV32ZBB-NEXT: lw t0, 12(a1)
637 ; RV32ZBB-NEXT: lw a7, 8(a2)
638 ; RV32ZBB-NEXT: lw t1, 12(a2)
639 ; RV32ZBB-NEXT: lw a5, 0(a2)
640 ; RV32ZBB-NEXT: lw a1, 4(a2)
641 ; RV32ZBB-NEXT: sltu a2, a7, a6
642 ; RV32ZBB-NEXT: mv t4, a2
643 ; RV32ZBB-NEXT: beq t0, t1, .LBB11_2
644 ; RV32ZBB-NEXT: # %bb.1:
645 ; RV32ZBB-NEXT: slt t4, t1, t0
646 ; RV32ZBB-NEXT: .LBB11_2:
647 ; RV32ZBB-NEXT: sltu t2, a5, a3
648 ; RV32ZBB-NEXT: sltu t5, a1, a4
649 ; RV32ZBB-NEXT: mv t3, t2
650 ; RV32ZBB-NEXT: beq a4, a1, .LBB11_4
651 ; RV32ZBB-NEXT: # %bb.3:
652 ; RV32ZBB-NEXT: mv t3, t5
653 ; RV32ZBB-NEXT: .LBB11_4:
654 ; RV32ZBB-NEXT: addi sp, sp, -16
655 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
656 ; RV32ZBB-NEXT: xor t6, t0, t1
657 ; RV32ZBB-NEXT: xor s0, a6, a7
658 ; RV32ZBB-NEXT: or t6, s0, t6
659 ; RV32ZBB-NEXT: beqz t6, .LBB11_6
660 ; RV32ZBB-NEXT: # %bb.5:
661 ; RV32ZBB-NEXT: mv t3, t4
662 ; RV32ZBB-NEXT: .LBB11_6:
663 ; RV32ZBB-NEXT: mv t4, t2
664 ; RV32ZBB-NEXT: beq a1, a4, .LBB11_8
665 ; RV32ZBB-NEXT: # %bb.7:
666 ; RV32ZBB-NEXT: mv t4, t5
667 ; RV32ZBB-NEXT: .LBB11_8:
668 ; RV32ZBB-NEXT: sltu t5, a3, a5
669 ; RV32ZBB-NEXT: mv t6, t5
670 ; RV32ZBB-NEXT: beq a4, a1, .LBB11_10
671 ; RV32ZBB-NEXT: # %bb.9:
672 ; RV32ZBB-NEXT: sltu t6, a4, a1
673 ; RV32ZBB-NEXT: .LBB11_10:
674 ; RV32ZBB-NEXT: bnez t3, .LBB11_12
675 ; RV32ZBB-NEXT: # %bb.11:
676 ; RV32ZBB-NEXT: sub t0, t1, t0
677 ; RV32ZBB-NEXT: sub a6, a7, a6
678 ; RV32ZBB-NEXT: sub a3, a5, a3
679 ; RV32ZBB-NEXT: sub a1, a1, a4
680 ; RV32ZBB-NEXT: sub a4, t0, a2
681 ; RV32ZBB-NEXT: sltu a5, a6, t4
682 ; RV32ZBB-NEXT: sub a2, a1, t2
683 ; RV32ZBB-NEXT: sub a1, a4, a5
684 ; RV32ZBB-NEXT: sub a4, a6, t4
685 ; RV32ZBB-NEXT: j .LBB11_13
686 ; RV32ZBB-NEXT: .LBB11_12:
687 ; RV32ZBB-NEXT: sltu a2, a6, a7
688 ; RV32ZBB-NEXT: sub t0, t0, t1
689 ; RV32ZBB-NEXT: sub a6, a6, a7
690 ; RV32ZBB-NEXT: sub a3, a3, a5
691 ; RV32ZBB-NEXT: sub a4, a4, a1
692 ; RV32ZBB-NEXT: sub a1, t0, a2
693 ; RV32ZBB-NEXT: sltu a5, a6, t6
694 ; RV32ZBB-NEXT: sub a2, a4, t5
695 ; RV32ZBB-NEXT: sub a1, a1, a5
696 ; RV32ZBB-NEXT: sub a4, a6, t6
697 ; RV32ZBB-NEXT: .LBB11_13:
698 ; RV32ZBB-NEXT: sw a3, 0(a0)
699 ; RV32ZBB-NEXT: sw a2, 4(a0)
700 ; RV32ZBB-NEXT: sw a4, 8(a0)
701 ; RV32ZBB-NEXT: sw a1, 12(a0)
702 ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
703 ; RV32ZBB-NEXT: addi sp, sp, 16
706 ; RV64ZBB-LABEL: abd_ext_i128:
708 ; RV64ZBB-NEXT: sltu a4, a2, a0
709 ; RV64ZBB-NEXT: mv a5, a4
710 ; RV64ZBB-NEXT: beq a1, a3, .LBB11_2
711 ; RV64ZBB-NEXT: # %bb.1:
712 ; RV64ZBB-NEXT: slt a5, a3, a1
713 ; RV64ZBB-NEXT: .LBB11_2:
714 ; RV64ZBB-NEXT: bnez a5, .LBB11_4
715 ; RV64ZBB-NEXT: # %bb.3:
716 ; RV64ZBB-NEXT: sub a1, a3, a1
717 ; RV64ZBB-NEXT: sub a1, a1, a4
718 ; RV64ZBB-NEXT: sub a0, a2, a0
720 ; RV64ZBB-NEXT: .LBB11_4:
721 ; RV64ZBB-NEXT: sltu a4, a0, a2
722 ; RV64ZBB-NEXT: sub a1, a1, a3
723 ; RV64ZBB-NEXT: sub a1, a1, a4
724 ; RV64ZBB-NEXT: sub a0, a0, a2
726 %aext = sext i128 %a to i256
727 %bext = sext i128 %b to i256
728 %sub = sub i256 %aext, %bext
729 %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false)
730 %trunc = trunc i256 %abs to i128
734 define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
735 ; RV32I-LABEL: abd_ext_i128_undef:
737 ; RV32I-NEXT: lw a3, 0(a1)
738 ; RV32I-NEXT: lw a4, 4(a1)
739 ; RV32I-NEXT: lw a6, 8(a1)
740 ; RV32I-NEXT: lw t0, 12(a1)
741 ; RV32I-NEXT: lw a7, 8(a2)
742 ; RV32I-NEXT: lw t1, 12(a2)
743 ; RV32I-NEXT: lw a5, 0(a2)
744 ; RV32I-NEXT: lw a1, 4(a2)
745 ; RV32I-NEXT: sltu a2, a7, a6
746 ; RV32I-NEXT: mv t4, a2
747 ; RV32I-NEXT: beq t0, t1, .LBB12_2
748 ; RV32I-NEXT: # %bb.1:
749 ; RV32I-NEXT: slt t4, t1, t0
750 ; RV32I-NEXT: .LBB12_2:
751 ; RV32I-NEXT: sltu t2, a5, a3
752 ; RV32I-NEXT: sltu t5, a1, a4
753 ; RV32I-NEXT: mv t3, t2
754 ; RV32I-NEXT: beq a4, a1, .LBB12_4
755 ; RV32I-NEXT: # %bb.3:
756 ; RV32I-NEXT: mv t3, t5
757 ; RV32I-NEXT: .LBB12_4:
758 ; RV32I-NEXT: addi sp, sp, -16
759 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
760 ; RV32I-NEXT: xor t6, t0, t1
761 ; RV32I-NEXT: xor s0, a6, a7
762 ; RV32I-NEXT: or t6, s0, t6
763 ; RV32I-NEXT: beqz t6, .LBB12_6
764 ; RV32I-NEXT: # %bb.5:
765 ; RV32I-NEXT: mv t3, t4
766 ; RV32I-NEXT: .LBB12_6:
767 ; RV32I-NEXT: mv t4, t2
768 ; RV32I-NEXT: beq a1, a4, .LBB12_8
769 ; RV32I-NEXT: # %bb.7:
770 ; RV32I-NEXT: mv t4, t5
771 ; RV32I-NEXT: .LBB12_8:
772 ; RV32I-NEXT: sltu t5, a3, a5
773 ; RV32I-NEXT: mv t6, t5
774 ; RV32I-NEXT: beq a4, a1, .LBB12_10
775 ; RV32I-NEXT: # %bb.9:
776 ; RV32I-NEXT: sltu t6, a4, a1
777 ; RV32I-NEXT: .LBB12_10:
778 ; RV32I-NEXT: bnez t3, .LBB12_12
779 ; RV32I-NEXT: # %bb.11:
780 ; RV32I-NEXT: sub t0, t1, t0
781 ; RV32I-NEXT: sub a6, a7, a6
782 ; RV32I-NEXT: sub a3, a5, a3
783 ; RV32I-NEXT: sub a1, a1, a4
784 ; RV32I-NEXT: sub a4, t0, a2
785 ; RV32I-NEXT: sltu a5, a6, t4
786 ; RV32I-NEXT: sub a2, a1, t2
787 ; RV32I-NEXT: sub a1, a4, a5
788 ; RV32I-NEXT: sub a4, a6, t4
789 ; RV32I-NEXT: j .LBB12_13
790 ; RV32I-NEXT: .LBB12_12:
791 ; RV32I-NEXT: sltu a2, a6, a7
792 ; RV32I-NEXT: sub t0, t0, t1
793 ; RV32I-NEXT: sub a6, a6, a7
794 ; RV32I-NEXT: sub a3, a3, a5
795 ; RV32I-NEXT: sub a4, a4, a1
796 ; RV32I-NEXT: sub a1, t0, a2
797 ; RV32I-NEXT: sltu a5, a6, t6
798 ; RV32I-NEXT: sub a2, a4, t5
799 ; RV32I-NEXT: sub a1, a1, a5
800 ; RV32I-NEXT: sub a4, a6, t6
801 ; RV32I-NEXT: .LBB12_13:
802 ; RV32I-NEXT: sw a3, 0(a0)
803 ; RV32I-NEXT: sw a2, 4(a0)
804 ; RV32I-NEXT: sw a4, 8(a0)
805 ; RV32I-NEXT: sw a1, 12(a0)
806 ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
807 ; RV32I-NEXT: addi sp, sp, 16
810 ; RV64I-LABEL: abd_ext_i128_undef:
812 ; RV64I-NEXT: sltu a4, a2, a0
813 ; RV64I-NEXT: mv a5, a4
814 ; RV64I-NEXT: beq a1, a3, .LBB12_2
815 ; RV64I-NEXT: # %bb.1:
816 ; RV64I-NEXT: slt a5, a3, a1
817 ; RV64I-NEXT: .LBB12_2:
818 ; RV64I-NEXT: bnez a5, .LBB12_4
819 ; RV64I-NEXT: # %bb.3:
820 ; RV64I-NEXT: sub a1, a3, a1
821 ; RV64I-NEXT: sub a1, a1, a4
822 ; RV64I-NEXT: sub a0, a2, a0
824 ; RV64I-NEXT: .LBB12_4:
825 ; RV64I-NEXT: sltu a4, a0, a2
826 ; RV64I-NEXT: sub a1, a1, a3
827 ; RV64I-NEXT: sub a1, a1, a4
828 ; RV64I-NEXT: sub a0, a0, a2
831 ; RV32ZBB-LABEL: abd_ext_i128_undef:
833 ; RV32ZBB-NEXT: lw a3, 0(a1)
834 ; RV32ZBB-NEXT: lw a4, 4(a1)
835 ; RV32ZBB-NEXT: lw a6, 8(a1)
836 ; RV32ZBB-NEXT: lw t0, 12(a1)
837 ; RV32ZBB-NEXT: lw a7, 8(a2)
838 ; RV32ZBB-NEXT: lw t1, 12(a2)
839 ; RV32ZBB-NEXT: lw a5, 0(a2)
840 ; RV32ZBB-NEXT: lw a1, 4(a2)
841 ; RV32ZBB-NEXT: sltu a2, a7, a6
842 ; RV32ZBB-NEXT: mv t4, a2
843 ; RV32ZBB-NEXT: beq t0, t1, .LBB12_2
844 ; RV32ZBB-NEXT: # %bb.1:
845 ; RV32ZBB-NEXT: slt t4, t1, t0
846 ; RV32ZBB-NEXT: .LBB12_2:
847 ; RV32ZBB-NEXT: sltu t2, a5, a3
848 ; RV32ZBB-NEXT: sltu t5, a1, a4
849 ; RV32ZBB-NEXT: mv t3, t2
850 ; RV32ZBB-NEXT: beq a4, a1, .LBB12_4
851 ; RV32ZBB-NEXT: # %bb.3:
852 ; RV32ZBB-NEXT: mv t3, t5
853 ; RV32ZBB-NEXT: .LBB12_4:
854 ; RV32ZBB-NEXT: addi sp, sp, -16
855 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
856 ; RV32ZBB-NEXT: xor t6, t0, t1
857 ; RV32ZBB-NEXT: xor s0, a6, a7
858 ; RV32ZBB-NEXT: or t6, s0, t6
859 ; RV32ZBB-NEXT: beqz t6, .LBB12_6
860 ; RV32ZBB-NEXT: # %bb.5:
861 ; RV32ZBB-NEXT: mv t3, t4
862 ; RV32ZBB-NEXT: .LBB12_6:
863 ; RV32ZBB-NEXT: mv t4, t2
864 ; RV32ZBB-NEXT: beq a1, a4, .LBB12_8
865 ; RV32ZBB-NEXT: # %bb.7:
866 ; RV32ZBB-NEXT: mv t4, t5
867 ; RV32ZBB-NEXT: .LBB12_8:
868 ; RV32ZBB-NEXT: sltu t5, a3, a5
869 ; RV32ZBB-NEXT: mv t6, t5
870 ; RV32ZBB-NEXT: beq a4, a1, .LBB12_10
871 ; RV32ZBB-NEXT: # %bb.9:
872 ; RV32ZBB-NEXT: sltu t6, a4, a1
873 ; RV32ZBB-NEXT: .LBB12_10:
874 ; RV32ZBB-NEXT: bnez t3, .LBB12_12
875 ; RV32ZBB-NEXT: # %bb.11:
876 ; RV32ZBB-NEXT: sub t0, t1, t0
877 ; RV32ZBB-NEXT: sub a6, a7, a6
878 ; RV32ZBB-NEXT: sub a3, a5, a3
879 ; RV32ZBB-NEXT: sub a1, a1, a4
880 ; RV32ZBB-NEXT: sub a4, t0, a2
881 ; RV32ZBB-NEXT: sltu a5, a6, t4
882 ; RV32ZBB-NEXT: sub a2, a1, t2
883 ; RV32ZBB-NEXT: sub a1, a4, a5
884 ; RV32ZBB-NEXT: sub a4, a6, t4
885 ; RV32ZBB-NEXT: j .LBB12_13
886 ; RV32ZBB-NEXT: .LBB12_12:
887 ; RV32ZBB-NEXT: sltu a2, a6, a7
888 ; RV32ZBB-NEXT: sub t0, t0, t1
889 ; RV32ZBB-NEXT: sub a6, a6, a7
890 ; RV32ZBB-NEXT: sub a3, a3, a5
891 ; RV32ZBB-NEXT: sub a4, a4, a1
892 ; RV32ZBB-NEXT: sub a1, t0, a2
893 ; RV32ZBB-NEXT: sltu a5, a6, t6
894 ; RV32ZBB-NEXT: sub a2, a4, t5
895 ; RV32ZBB-NEXT: sub a1, a1, a5
896 ; RV32ZBB-NEXT: sub a4, a6, t6
897 ; RV32ZBB-NEXT: .LBB12_13:
898 ; RV32ZBB-NEXT: sw a3, 0(a0)
899 ; RV32ZBB-NEXT: sw a2, 4(a0)
900 ; RV32ZBB-NEXT: sw a4, 8(a0)
901 ; RV32ZBB-NEXT: sw a1, 12(a0)
902 ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
903 ; RV32ZBB-NEXT: addi sp, sp, 16
906 ; RV64ZBB-LABEL: abd_ext_i128_undef:
908 ; RV64ZBB-NEXT: sltu a4, a2, a0
909 ; RV64ZBB-NEXT: mv a5, a4
910 ; RV64ZBB-NEXT: beq a1, a3, .LBB12_2
911 ; RV64ZBB-NEXT: # %bb.1:
912 ; RV64ZBB-NEXT: slt a5, a3, a1
913 ; RV64ZBB-NEXT: .LBB12_2:
914 ; RV64ZBB-NEXT: bnez a5, .LBB12_4
915 ; RV64ZBB-NEXT: # %bb.3:
916 ; RV64ZBB-NEXT: sub a1, a3, a1
917 ; RV64ZBB-NEXT: sub a1, a1, a4
918 ; RV64ZBB-NEXT: sub a0, a2, a0
920 ; RV64ZBB-NEXT: .LBB12_4:
921 ; RV64ZBB-NEXT: sltu a4, a0, a2
922 ; RV64ZBB-NEXT: sub a1, a1, a3
923 ; RV64ZBB-NEXT: sub a1, a1, a4
924 ; RV64ZBB-NEXT: sub a0, a0, a2
926 %aext = sext i128 %a to i256
927 %bext = sext i128 %b to i256
928 %sub = sub i256 %aext, %bext
929 %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true)
930 %trunc = trunc i256 %abs to i128
935 ; sub(smax(a,b),smin(a,b)) -> abds(a,b)
938 define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
939 ; RV32I-LABEL: abd_minmax_i8:
941 ; RV32I-NEXT: slli a1, a1, 24
942 ; RV32I-NEXT: slli a0, a0, 24
943 ; RV32I-NEXT: srai a1, a1, 24
944 ; RV32I-NEXT: srai a0, a0, 24
945 ; RV32I-NEXT: sub a0, a0, a1
946 ; RV32I-NEXT: srai a1, a0, 31
947 ; RV32I-NEXT: xor a0, a0, a1
948 ; RV32I-NEXT: sub a0, a0, a1
951 ; RV64I-LABEL: abd_minmax_i8:
953 ; RV64I-NEXT: slli a1, a1, 56
954 ; RV64I-NEXT: slli a0, a0, 56
955 ; RV64I-NEXT: srai a1, a1, 56
956 ; RV64I-NEXT: srai a0, a0, 56
957 ; RV64I-NEXT: sub a0, a0, a1
958 ; RV64I-NEXT: srai a1, a0, 63
959 ; RV64I-NEXT: xor a0, a0, a1
960 ; RV64I-NEXT: sub a0, a0, a1
963 ; ZBB-LABEL: abd_minmax_i8:
965 ; ZBB-NEXT: sext.b a1, a1
966 ; ZBB-NEXT: sext.b a0, a0
967 ; ZBB-NEXT: min a2, a0, a1
968 ; ZBB-NEXT: max a0, a0, a1
969 ; ZBB-NEXT: sub a0, a0, a2
971 %min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
972 %max = call i8 @llvm.smax.i8(i8 %a, i8 %b)
973 %sub = sub i8 %max, %min
977 define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
978 ; RV32I-LABEL: abd_minmax_i16:
980 ; RV32I-NEXT: slli a1, a1, 16
981 ; RV32I-NEXT: slli a0, a0, 16
982 ; RV32I-NEXT: srai a1, a1, 16
983 ; RV32I-NEXT: srai a0, a0, 16
984 ; RV32I-NEXT: sub a0, a0, a1
985 ; RV32I-NEXT: srai a1, a0, 31
986 ; RV32I-NEXT: xor a0, a0, a1
987 ; RV32I-NEXT: sub a0, a0, a1
990 ; RV64I-LABEL: abd_minmax_i16:
992 ; RV64I-NEXT: slli a1, a1, 48
993 ; RV64I-NEXT: slli a0, a0, 48
994 ; RV64I-NEXT: srai a1, a1, 48
995 ; RV64I-NEXT: srai a0, a0, 48
996 ; RV64I-NEXT: sub a0, a0, a1
997 ; RV64I-NEXT: srai a1, a0, 63
998 ; RV64I-NEXT: xor a0, a0, a1
999 ; RV64I-NEXT: sub a0, a0, a1
1002 ; ZBB-LABEL: abd_minmax_i16:
1004 ; ZBB-NEXT: sext.h a1, a1
1005 ; ZBB-NEXT: sext.h a0, a0
1006 ; ZBB-NEXT: min a2, a0, a1
1007 ; ZBB-NEXT: max a0, a0, a1
1008 ; ZBB-NEXT: sub a0, a0, a2
1010 %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
1011 %max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
1012 %sub = sub i16 %max, %min
1016 define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
1017 ; RV32I-LABEL: abd_minmax_i32:
1019 ; RV32I-NEXT: blt a1, a0, .LBB15_2
1020 ; RV32I-NEXT: # %bb.1:
1021 ; RV32I-NEXT: sub a0, a1, a0
1023 ; RV32I-NEXT: .LBB15_2:
1024 ; RV32I-NEXT: sub a0, a0, a1
1027 ; RV64I-LABEL: abd_minmax_i32:
1029 ; RV64I-NEXT: sext.w a1, a1
1030 ; RV64I-NEXT: sext.w a0, a0
1031 ; RV64I-NEXT: sub a0, a0, a1
1032 ; RV64I-NEXT: srai a1, a0, 63
1033 ; RV64I-NEXT: xor a0, a0, a1
1034 ; RV64I-NEXT: sub a0, a0, a1
1037 ; RV32ZBB-LABEL: abd_minmax_i32:
1039 ; RV32ZBB-NEXT: min a2, a0, a1
1040 ; RV32ZBB-NEXT: max a0, a0, a1
1041 ; RV32ZBB-NEXT: sub a0, a0, a2
1044 ; RV64ZBB-LABEL: abd_minmax_i32:
1046 ; RV64ZBB-NEXT: sext.w a1, a1
1047 ; RV64ZBB-NEXT: sext.w a0, a0
1048 ; RV64ZBB-NEXT: min a2, a0, a1
1049 ; RV64ZBB-NEXT: max a0, a0, a1
1050 ; RV64ZBB-NEXT: sub a0, a0, a2
1052 %min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
1053 %max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
1054 %sub = sub i32 %max, %min
1058 define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
1059 ; RV32I-LABEL: abd_minmax_i64:
1061 ; RV32I-NEXT: sltu a4, a2, a0
1062 ; RV32I-NEXT: mv a5, a4
1063 ; RV32I-NEXT: beq a1, a3, .LBB16_2
1064 ; RV32I-NEXT: # %bb.1:
1065 ; RV32I-NEXT: slt a5, a3, a1
1066 ; RV32I-NEXT: .LBB16_2:
1067 ; RV32I-NEXT: bnez a5, .LBB16_4
1068 ; RV32I-NEXT: # %bb.3:
1069 ; RV32I-NEXT: sub a1, a3, a1
1070 ; RV32I-NEXT: sub a1, a1, a4
1071 ; RV32I-NEXT: sub a0, a2, a0
1073 ; RV32I-NEXT: .LBB16_4:
1074 ; RV32I-NEXT: sltu a4, a0, a2
1075 ; RV32I-NEXT: sub a1, a1, a3
1076 ; RV32I-NEXT: sub a1, a1, a4
1077 ; RV32I-NEXT: sub a0, a0, a2
1080 ; RV64I-LABEL: abd_minmax_i64:
1082 ; RV64I-NEXT: blt a1, a0, .LBB16_2
1083 ; RV64I-NEXT: # %bb.1:
1084 ; RV64I-NEXT: sub a0, a1, a0
1086 ; RV64I-NEXT: .LBB16_2:
1087 ; RV64I-NEXT: sub a0, a0, a1
1090 ; RV32ZBB-LABEL: abd_minmax_i64:
1092 ; RV32ZBB-NEXT: sltu a4, a2, a0
1093 ; RV32ZBB-NEXT: mv a5, a4
1094 ; RV32ZBB-NEXT: beq a1, a3, .LBB16_2
1095 ; RV32ZBB-NEXT: # %bb.1:
1096 ; RV32ZBB-NEXT: slt a5, a3, a1
1097 ; RV32ZBB-NEXT: .LBB16_2:
1098 ; RV32ZBB-NEXT: bnez a5, .LBB16_4
1099 ; RV32ZBB-NEXT: # %bb.3:
1100 ; RV32ZBB-NEXT: sub a1, a3, a1
1101 ; RV32ZBB-NEXT: sub a1, a1, a4
1102 ; RV32ZBB-NEXT: sub a0, a2, a0
1104 ; RV32ZBB-NEXT: .LBB16_4:
1105 ; RV32ZBB-NEXT: sltu a4, a0, a2
1106 ; RV32ZBB-NEXT: sub a1, a1, a3
1107 ; RV32ZBB-NEXT: sub a1, a1, a4
1108 ; RV32ZBB-NEXT: sub a0, a0, a2
1111 ; RV64ZBB-LABEL: abd_minmax_i64:
1113 ; RV64ZBB-NEXT: min a2, a0, a1
1114 ; RV64ZBB-NEXT: max a0, a0, a1
1115 ; RV64ZBB-NEXT: sub a0, a0, a2
1117 %min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
1118 %max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
1119 %sub = sub i64 %max, %min
1123 define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
1124 ; RV32I-LABEL: abd_minmax_i128:
1126 ; RV32I-NEXT: lw a3, 0(a1)
1127 ; RV32I-NEXT: lw a4, 4(a1)
1128 ; RV32I-NEXT: lw a6, 8(a1)
1129 ; RV32I-NEXT: lw t0, 12(a1)
1130 ; RV32I-NEXT: lw a7, 8(a2)
1131 ; RV32I-NEXT: lw t1, 12(a2)
1132 ; RV32I-NEXT: lw a5, 0(a2)
1133 ; RV32I-NEXT: lw a1, 4(a2)
1134 ; RV32I-NEXT: sltu a2, a7, a6
1135 ; RV32I-NEXT: mv t4, a2
1136 ; RV32I-NEXT: beq t0, t1, .LBB17_2
1137 ; RV32I-NEXT: # %bb.1:
1138 ; RV32I-NEXT: slt t4, t1, t0
1139 ; RV32I-NEXT: .LBB17_2:
1140 ; RV32I-NEXT: sltu t2, a5, a3
1141 ; RV32I-NEXT: sltu t5, a1, a4
1142 ; RV32I-NEXT: mv t3, t2
1143 ; RV32I-NEXT: beq a4, a1, .LBB17_4
1144 ; RV32I-NEXT: # %bb.3:
1145 ; RV32I-NEXT: mv t3, t5
1146 ; RV32I-NEXT: .LBB17_4:
1147 ; RV32I-NEXT: addi sp, sp, -16
1148 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
1149 ; RV32I-NEXT: xor t6, t0, t1
1150 ; RV32I-NEXT: xor s0, a6, a7
1151 ; RV32I-NEXT: or t6, s0, t6
1152 ; RV32I-NEXT: beqz t6, .LBB17_6
1153 ; RV32I-NEXT: # %bb.5:
1154 ; RV32I-NEXT: mv t3, t4
1155 ; RV32I-NEXT: .LBB17_6:
1156 ; RV32I-NEXT: mv t4, t2
1157 ; RV32I-NEXT: beq a1, a4, .LBB17_8
1158 ; RV32I-NEXT: # %bb.7:
1159 ; RV32I-NEXT: mv t4, t5
1160 ; RV32I-NEXT: .LBB17_8:
1161 ; RV32I-NEXT: sltu t5, a3, a5
1162 ; RV32I-NEXT: mv t6, t5
1163 ; RV32I-NEXT: beq a4, a1, .LBB17_10
1164 ; RV32I-NEXT: # %bb.9:
1165 ; RV32I-NEXT: sltu t6, a4, a1
1166 ; RV32I-NEXT: .LBB17_10:
1167 ; RV32I-NEXT: bnez t3, .LBB17_12
1168 ; RV32I-NEXT: # %bb.11:
1169 ; RV32I-NEXT: sub t0, t1, t0
1170 ; RV32I-NEXT: sub a6, a7, a6
1171 ; RV32I-NEXT: sub a3, a5, a3
1172 ; RV32I-NEXT: sub a1, a1, a4
1173 ; RV32I-NEXT: sub a4, t0, a2
1174 ; RV32I-NEXT: sltu a5, a6, t4
1175 ; RV32I-NEXT: sub a2, a1, t2
1176 ; RV32I-NEXT: sub a1, a4, a5
1177 ; RV32I-NEXT: sub a4, a6, t4
1178 ; RV32I-NEXT: j .LBB17_13
1179 ; RV32I-NEXT: .LBB17_12:
1180 ; RV32I-NEXT: sltu a2, a6, a7
1181 ; RV32I-NEXT: sub t0, t0, t1
1182 ; RV32I-NEXT: sub a6, a6, a7
1183 ; RV32I-NEXT: sub a3, a3, a5
1184 ; RV32I-NEXT: sub a4, a4, a1
1185 ; RV32I-NEXT: sub a1, t0, a2
1186 ; RV32I-NEXT: sltu a5, a6, t6
1187 ; RV32I-NEXT: sub a2, a4, t5
1188 ; RV32I-NEXT: sub a1, a1, a5
1189 ; RV32I-NEXT: sub a4, a6, t6
1190 ; RV32I-NEXT: .LBB17_13:
1191 ; RV32I-NEXT: sw a3, 0(a0)
1192 ; RV32I-NEXT: sw a2, 4(a0)
1193 ; RV32I-NEXT: sw a4, 8(a0)
1194 ; RV32I-NEXT: sw a1, 12(a0)
1195 ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
1196 ; RV32I-NEXT: addi sp, sp, 16
1199 ; RV64I-LABEL: abd_minmax_i128:
1201 ; RV64I-NEXT: sltu a4, a2, a0
1202 ; RV64I-NEXT: mv a5, a4
1203 ; RV64I-NEXT: beq a1, a3, .LBB17_2
1204 ; RV64I-NEXT: # %bb.1:
1205 ; RV64I-NEXT: slt a5, a3, a1
1206 ; RV64I-NEXT: .LBB17_2:
1207 ; RV64I-NEXT: bnez a5, .LBB17_4
1208 ; RV64I-NEXT: # %bb.3:
1209 ; RV64I-NEXT: sub a1, a3, a1
1210 ; RV64I-NEXT: sub a1, a1, a4
1211 ; RV64I-NEXT: sub a0, a2, a0
1213 ; RV64I-NEXT: .LBB17_4:
1214 ; RV64I-NEXT: sltu a4, a0, a2
1215 ; RV64I-NEXT: sub a1, a1, a3
1216 ; RV64I-NEXT: sub a1, a1, a4
1217 ; RV64I-NEXT: sub a0, a0, a2
1220 ; RV32ZBB-LABEL: abd_minmax_i128:
1222 ; RV32ZBB-NEXT: lw a3, 0(a1)
1223 ; RV32ZBB-NEXT: lw a4, 4(a1)
1224 ; RV32ZBB-NEXT: lw a6, 8(a1)
1225 ; RV32ZBB-NEXT: lw t0, 12(a1)
1226 ; RV32ZBB-NEXT: lw a7, 8(a2)
1227 ; RV32ZBB-NEXT: lw t1, 12(a2)
1228 ; RV32ZBB-NEXT: lw a5, 0(a2)
1229 ; RV32ZBB-NEXT: lw a1, 4(a2)
1230 ; RV32ZBB-NEXT: sltu a2, a7, a6
1231 ; RV32ZBB-NEXT: mv t4, a2
1232 ; RV32ZBB-NEXT: beq t0, t1, .LBB17_2
1233 ; RV32ZBB-NEXT: # %bb.1:
1234 ; RV32ZBB-NEXT: slt t4, t1, t0
1235 ; RV32ZBB-NEXT: .LBB17_2:
1236 ; RV32ZBB-NEXT: sltu t2, a5, a3
1237 ; RV32ZBB-NEXT: sltu t5, a1, a4
1238 ; RV32ZBB-NEXT: mv t3, t2
1239 ; RV32ZBB-NEXT: beq a4, a1, .LBB17_4
1240 ; RV32ZBB-NEXT: # %bb.3:
1241 ; RV32ZBB-NEXT: mv t3, t5
1242 ; RV32ZBB-NEXT: .LBB17_4:
1243 ; RV32ZBB-NEXT: addi sp, sp, -16
1244 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
1245 ; RV32ZBB-NEXT: xor t6, t0, t1
1246 ; RV32ZBB-NEXT: xor s0, a6, a7
1247 ; RV32ZBB-NEXT: or t6, s0, t6
1248 ; RV32ZBB-NEXT: beqz t6, .LBB17_6
1249 ; RV32ZBB-NEXT: # %bb.5:
1250 ; RV32ZBB-NEXT: mv t3, t4
1251 ; RV32ZBB-NEXT: .LBB17_6:
1252 ; RV32ZBB-NEXT: mv t4, t2
1253 ; RV32ZBB-NEXT: beq a1, a4, .LBB17_8
1254 ; RV32ZBB-NEXT: # %bb.7:
1255 ; RV32ZBB-NEXT: mv t4, t5
1256 ; RV32ZBB-NEXT: .LBB17_8:
1257 ; RV32ZBB-NEXT: sltu t5, a3, a5
1258 ; RV32ZBB-NEXT: mv t6, t5
1259 ; RV32ZBB-NEXT: beq a4, a1, .LBB17_10
1260 ; RV32ZBB-NEXT: # %bb.9:
1261 ; RV32ZBB-NEXT: sltu t6, a4, a1
1262 ; RV32ZBB-NEXT: .LBB17_10:
1263 ; RV32ZBB-NEXT: bnez t3, .LBB17_12
1264 ; RV32ZBB-NEXT: # %bb.11:
1265 ; RV32ZBB-NEXT: sub t0, t1, t0
1266 ; RV32ZBB-NEXT: sub a6, a7, a6
1267 ; RV32ZBB-NEXT: sub a3, a5, a3
1268 ; RV32ZBB-NEXT: sub a1, a1, a4
1269 ; RV32ZBB-NEXT: sub a4, t0, a2
1270 ; RV32ZBB-NEXT: sltu a5, a6, t4
1271 ; RV32ZBB-NEXT: sub a2, a1, t2
1272 ; RV32ZBB-NEXT: sub a1, a4, a5
1273 ; RV32ZBB-NEXT: sub a4, a6, t4
1274 ; RV32ZBB-NEXT: j .LBB17_13
1275 ; RV32ZBB-NEXT: .LBB17_12:
1276 ; RV32ZBB-NEXT: sltu a2, a6, a7
1277 ; RV32ZBB-NEXT: sub t0, t0, t1
1278 ; RV32ZBB-NEXT: sub a6, a6, a7
1279 ; RV32ZBB-NEXT: sub a3, a3, a5
1280 ; RV32ZBB-NEXT: sub a4, a4, a1
1281 ; RV32ZBB-NEXT: sub a1, t0, a2
1282 ; RV32ZBB-NEXT: sltu a5, a6, t6
1283 ; RV32ZBB-NEXT: sub a2, a4, t5
1284 ; RV32ZBB-NEXT: sub a1, a1, a5
1285 ; RV32ZBB-NEXT: sub a4, a6, t6
1286 ; RV32ZBB-NEXT: .LBB17_13:
1287 ; RV32ZBB-NEXT: sw a3, 0(a0)
1288 ; RV32ZBB-NEXT: sw a2, 4(a0)
1289 ; RV32ZBB-NEXT: sw a4, 8(a0)
1290 ; RV32ZBB-NEXT: sw a1, 12(a0)
1291 ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
1292 ; RV32ZBB-NEXT: addi sp, sp, 16
1295 ; RV64ZBB-LABEL: abd_minmax_i128:
1297 ; RV64ZBB-NEXT: sltu a4, a2, a0
1298 ; RV64ZBB-NEXT: mv a5, a4
1299 ; RV64ZBB-NEXT: beq a1, a3, .LBB17_2
1300 ; RV64ZBB-NEXT: # %bb.1:
1301 ; RV64ZBB-NEXT: slt a5, a3, a1
1302 ; RV64ZBB-NEXT: .LBB17_2:
1303 ; RV64ZBB-NEXT: bnez a5, .LBB17_4
1304 ; RV64ZBB-NEXT: # %bb.3:
1305 ; RV64ZBB-NEXT: sub a1, a3, a1
1306 ; RV64ZBB-NEXT: sub a1, a1, a4
1307 ; RV64ZBB-NEXT: sub a0, a2, a0
1309 ; RV64ZBB-NEXT: .LBB17_4:
1310 ; RV64ZBB-NEXT: sltu a4, a0, a2
1311 ; RV64ZBB-NEXT: sub a1, a1, a3
1312 ; RV64ZBB-NEXT: sub a1, a1, a4
1313 ; RV64ZBB-NEXT: sub a0, a0, a2
1315 %min = call i128 @llvm.smin.i128(i128 %a, i128 %b)
1316 %max = call i128 @llvm.smax.i128(i128 %a, i128 %b)
1317 %sub = sub i128 %max, %min
1322 ; select(icmp(a,b),sub(a,b),sub(b,a)) -> abds(a,b)
1325 define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
1326 ; RV32I-LABEL: abd_cmp_i8:
1328 ; RV32I-NEXT: slli a1, a1, 24
1329 ; RV32I-NEXT: slli a0, a0, 24
1330 ; RV32I-NEXT: srai a1, a1, 24
1331 ; RV32I-NEXT: srai a0, a0, 24
1332 ; RV32I-NEXT: sub a0, a0, a1
1333 ; RV32I-NEXT: srai a1, a0, 31
1334 ; RV32I-NEXT: xor a0, a0, a1
1335 ; RV32I-NEXT: sub a0, a0, a1
1338 ; RV64I-LABEL: abd_cmp_i8:
1340 ; RV64I-NEXT: slli a1, a1, 56
1341 ; RV64I-NEXT: slli a0, a0, 56
1342 ; RV64I-NEXT: srai a1, a1, 56
1343 ; RV64I-NEXT: srai a0, a0, 56
1344 ; RV64I-NEXT: sub a0, a0, a1
1345 ; RV64I-NEXT: srai a1, a0, 63
1346 ; RV64I-NEXT: xor a0, a0, a1
1347 ; RV64I-NEXT: sub a0, a0, a1
1350 ; ZBB-LABEL: abd_cmp_i8:
1352 ; ZBB-NEXT: sext.b a1, a1
1353 ; ZBB-NEXT: sext.b a0, a0
1354 ; ZBB-NEXT: min a2, a0, a1
1355 ; ZBB-NEXT: max a0, a0, a1
1356 ; ZBB-NEXT: sub a0, a0, a2
1358 %cmp = icmp sgt i8 %a, %b
1361 %sel = select i1 %cmp, i8 %ab, i8 %ba
1365 define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
1366 ; RV32I-LABEL: abd_cmp_i16:
1368 ; RV32I-NEXT: slli a1, a1, 16
1369 ; RV32I-NEXT: slli a0, a0, 16
1370 ; RV32I-NEXT: srai a1, a1, 16
1371 ; RV32I-NEXT: srai a0, a0, 16
1372 ; RV32I-NEXT: sub a0, a0, a1
1373 ; RV32I-NEXT: srai a1, a0, 31
1374 ; RV32I-NEXT: xor a0, a0, a1
1375 ; RV32I-NEXT: sub a0, a0, a1
1378 ; RV64I-LABEL: abd_cmp_i16:
1380 ; RV64I-NEXT: slli a1, a1, 48
1381 ; RV64I-NEXT: slli a0, a0, 48
1382 ; RV64I-NEXT: srai a1, a1, 48
1383 ; RV64I-NEXT: srai a0, a0, 48
1384 ; RV64I-NEXT: sub a0, a0, a1
1385 ; RV64I-NEXT: srai a1, a0, 63
1386 ; RV64I-NEXT: xor a0, a0, a1
1387 ; RV64I-NEXT: sub a0, a0, a1
1390 ; ZBB-LABEL: abd_cmp_i16:
1392 ; ZBB-NEXT: sext.h a1, a1
1393 ; ZBB-NEXT: sext.h a0, a0
1394 ; ZBB-NEXT: min a2, a0, a1
1395 ; ZBB-NEXT: max a0, a0, a1
1396 ; ZBB-NEXT: sub a0, a0, a2
1398 %cmp = icmp sge i16 %a, %b
1399 %ab = sub i16 %a, %b
1400 %ba = sub i16 %b, %a
1401 %sel = select i1 %cmp, i16 %ab, i16 %ba
1405 define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
1406 ; RV32I-LABEL: abd_cmp_i32:
1408 ; RV32I-NEXT: blt a1, a0, .LBB20_2
1409 ; RV32I-NEXT: # %bb.1:
1410 ; RV32I-NEXT: sub a0, a1, a0
1412 ; RV32I-NEXT: .LBB20_2:
1413 ; RV32I-NEXT: sub a0, a0, a1
1416 ; RV64I-LABEL: abd_cmp_i32:
1418 ; RV64I-NEXT: sext.w a1, a1
1419 ; RV64I-NEXT: sext.w a0, a0
1420 ; RV64I-NEXT: sub a0, a0, a1
1421 ; RV64I-NEXT: srai a1, a0, 63
1422 ; RV64I-NEXT: xor a0, a0, a1
1423 ; RV64I-NEXT: sub a0, a0, a1
1426 ; RV32ZBB-LABEL: abd_cmp_i32:
1428 ; RV32ZBB-NEXT: min a2, a0, a1
1429 ; RV32ZBB-NEXT: max a0, a0, a1
1430 ; RV32ZBB-NEXT: sub a0, a0, a2
1433 ; RV64ZBB-LABEL: abd_cmp_i32:
1435 ; RV64ZBB-NEXT: sext.w a1, a1
1436 ; RV64ZBB-NEXT: sext.w a0, a0
1437 ; RV64ZBB-NEXT: min a2, a0, a1
1438 ; RV64ZBB-NEXT: max a0, a0, a1
1439 ; RV64ZBB-NEXT: sub a0, a0, a2
1441 %cmp = icmp slt i32 %a, %b
1442 %ab = sub i32 %a, %b
1443 %ba = sub i32 %b, %a
1444 %sel = select i1 %cmp, i32 %ba, i32 %ab
1448 define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
1449 ; RV32I-LABEL: abd_cmp_i64:
1451 ; RV32I-NEXT: sltu a4, a2, a0
1452 ; RV32I-NEXT: mv a5, a4
1453 ; RV32I-NEXT: beq a1, a3, .LBB21_2
1454 ; RV32I-NEXT: # %bb.1:
1455 ; RV32I-NEXT: slt a5, a3, a1
1456 ; RV32I-NEXT: .LBB21_2:
1457 ; RV32I-NEXT: bnez a5, .LBB21_4
1458 ; RV32I-NEXT: # %bb.3:
1459 ; RV32I-NEXT: sub a1, a3, a1
1460 ; RV32I-NEXT: sub a1, a1, a4
1461 ; RV32I-NEXT: sub a0, a2, a0
1463 ; RV32I-NEXT: .LBB21_4:
1464 ; RV32I-NEXT: sltu a4, a0, a2
1465 ; RV32I-NEXT: sub a1, a1, a3
1466 ; RV32I-NEXT: sub a1, a1, a4
1467 ; RV32I-NEXT: sub a0, a0, a2
1470 ; RV64I-LABEL: abd_cmp_i64:
1472 ; RV64I-NEXT: blt a1, a0, .LBB21_2
1473 ; RV64I-NEXT: # %bb.1:
1474 ; RV64I-NEXT: sub a0, a1, a0
1476 ; RV64I-NEXT: .LBB21_2:
1477 ; RV64I-NEXT: sub a0, a0, a1
1480 ; RV32ZBB-LABEL: abd_cmp_i64:
1482 ; RV32ZBB-NEXT: sltu a4, a2, a0
1483 ; RV32ZBB-NEXT: mv a5, a4
1484 ; RV32ZBB-NEXT: beq a1, a3, .LBB21_2
1485 ; RV32ZBB-NEXT: # %bb.1:
1486 ; RV32ZBB-NEXT: slt a5, a3, a1
1487 ; RV32ZBB-NEXT: .LBB21_2:
1488 ; RV32ZBB-NEXT: bnez a5, .LBB21_4
1489 ; RV32ZBB-NEXT: # %bb.3:
1490 ; RV32ZBB-NEXT: sub a1, a3, a1
1491 ; RV32ZBB-NEXT: sub a1, a1, a4
1492 ; RV32ZBB-NEXT: sub a0, a2, a0
1494 ; RV32ZBB-NEXT: .LBB21_4:
1495 ; RV32ZBB-NEXT: sltu a4, a0, a2
1496 ; RV32ZBB-NEXT: sub a1, a1, a3
1497 ; RV32ZBB-NEXT: sub a1, a1, a4
1498 ; RV32ZBB-NEXT: sub a0, a0, a2
1501 ; RV64ZBB-LABEL: abd_cmp_i64:
1503 ; RV64ZBB-NEXT: min a2, a0, a1
1504 ; RV64ZBB-NEXT: max a0, a0, a1
1505 ; RV64ZBB-NEXT: sub a0, a0, a2
1507 %cmp = icmp sge i64 %a, %b
1508 %ab = sub i64 %a, %b
1509 %ba = sub i64 %b, %a
1510 %sel = select i1 %cmp, i64 %ab, i64 %ba
1514 define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
1515 ; RV32I-LABEL: abd_cmp_i128:
1517 ; RV32I-NEXT: lw a3, 0(a1)
1518 ; RV32I-NEXT: lw a4, 4(a1)
1519 ; RV32I-NEXT: lw a6, 8(a1)
1520 ; RV32I-NEXT: lw t0, 12(a1)
1521 ; RV32I-NEXT: lw a7, 8(a2)
1522 ; RV32I-NEXT: lw t1, 12(a2)
1523 ; RV32I-NEXT: lw a5, 0(a2)
1524 ; RV32I-NEXT: lw a1, 4(a2)
1525 ; RV32I-NEXT: sltu a2, a7, a6
1526 ; RV32I-NEXT: mv t4, a2
1527 ; RV32I-NEXT: beq t0, t1, .LBB22_2
1528 ; RV32I-NEXT: # %bb.1:
1529 ; RV32I-NEXT: slt t4, t1, t0
1530 ; RV32I-NEXT: .LBB22_2:
1531 ; RV32I-NEXT: sltu t2, a5, a3
1532 ; RV32I-NEXT: sltu t5, a1, a4
1533 ; RV32I-NEXT: mv t3, t2
1534 ; RV32I-NEXT: beq a4, a1, .LBB22_4
1535 ; RV32I-NEXT: # %bb.3:
1536 ; RV32I-NEXT: mv t3, t5
1537 ; RV32I-NEXT: .LBB22_4:
1538 ; RV32I-NEXT: addi sp, sp, -16
1539 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
1540 ; RV32I-NEXT: xor t6, t0, t1
1541 ; RV32I-NEXT: xor s0, a6, a7
1542 ; RV32I-NEXT: or t6, s0, t6
1543 ; RV32I-NEXT: beqz t6, .LBB22_6
1544 ; RV32I-NEXT: # %bb.5:
1545 ; RV32I-NEXT: mv t3, t4
1546 ; RV32I-NEXT: .LBB22_6:
1547 ; RV32I-NEXT: mv t4, t2
1548 ; RV32I-NEXT: beq a1, a4, .LBB22_8
1549 ; RV32I-NEXT: # %bb.7:
1550 ; RV32I-NEXT: mv t4, t5
1551 ; RV32I-NEXT: .LBB22_8:
1552 ; RV32I-NEXT: sltu t5, a3, a5
1553 ; RV32I-NEXT: mv t6, t5
1554 ; RV32I-NEXT: beq a4, a1, .LBB22_10
1555 ; RV32I-NEXT: # %bb.9:
1556 ; RV32I-NEXT: sltu t6, a4, a1
1557 ; RV32I-NEXT: .LBB22_10:
1558 ; RV32I-NEXT: bnez t3, .LBB22_12
1559 ; RV32I-NEXT: # %bb.11:
1560 ; RV32I-NEXT: sub t0, t1, t0
1561 ; RV32I-NEXT: sub a6, a7, a6
1562 ; RV32I-NEXT: sub a3, a5, a3
1563 ; RV32I-NEXT: sub a1, a1, a4
1564 ; RV32I-NEXT: sub a4, t0, a2
1565 ; RV32I-NEXT: sltu a5, a6, t4
1566 ; RV32I-NEXT: sub a2, a1, t2
1567 ; RV32I-NEXT: sub a1, a4, a5
1568 ; RV32I-NEXT: sub a4, a6, t4
1569 ; RV32I-NEXT: j .LBB22_13
1570 ; RV32I-NEXT: .LBB22_12:
1571 ; RV32I-NEXT: sltu a2, a6, a7
1572 ; RV32I-NEXT: sub t0, t0, t1
1573 ; RV32I-NEXT: sub a6, a6, a7
1574 ; RV32I-NEXT: sub a3, a3, a5
1575 ; RV32I-NEXT: sub a4, a4, a1
1576 ; RV32I-NEXT: sub a1, t0, a2
1577 ; RV32I-NEXT: sltu a5, a6, t6
1578 ; RV32I-NEXT: sub a2, a4, t5
1579 ; RV32I-NEXT: sub a1, a1, a5
1580 ; RV32I-NEXT: sub a4, a6, t6
1581 ; RV32I-NEXT: .LBB22_13:
1582 ; RV32I-NEXT: sw a3, 0(a0)
1583 ; RV32I-NEXT: sw a2, 4(a0)
1584 ; RV32I-NEXT: sw a4, 8(a0)
1585 ; RV32I-NEXT: sw a1, 12(a0)
1586 ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
1587 ; RV32I-NEXT: addi sp, sp, 16
1590 ; RV64I-LABEL: abd_cmp_i128:
1592 ; RV64I-NEXT: sltu a4, a2, a0
1593 ; RV64I-NEXT: mv a5, a4
1594 ; RV64I-NEXT: beq a1, a3, .LBB22_2
1595 ; RV64I-NEXT: # %bb.1:
1596 ; RV64I-NEXT: slt a5, a3, a1
1597 ; RV64I-NEXT: .LBB22_2:
1598 ; RV64I-NEXT: bnez a5, .LBB22_4
1599 ; RV64I-NEXT: # %bb.3:
1600 ; RV64I-NEXT: sub a1, a3, a1
1601 ; RV64I-NEXT: sub a1, a1, a4
1602 ; RV64I-NEXT: sub a0, a2, a0
1604 ; RV64I-NEXT: .LBB22_4:
1605 ; RV64I-NEXT: sltu a4, a0, a2
1606 ; RV64I-NEXT: sub a1, a1, a3
1607 ; RV64I-NEXT: sub a1, a1, a4
1608 ; RV64I-NEXT: sub a0, a0, a2
1611 ; RV32ZBB-LABEL: abd_cmp_i128:
1613 ; RV32ZBB-NEXT: lw a3, 0(a1)
1614 ; RV32ZBB-NEXT: lw a4, 4(a1)
1615 ; RV32ZBB-NEXT: lw a6, 8(a1)
1616 ; RV32ZBB-NEXT: lw t0, 12(a1)
1617 ; RV32ZBB-NEXT: lw a7, 8(a2)
1618 ; RV32ZBB-NEXT: lw t1, 12(a2)
1619 ; RV32ZBB-NEXT: lw a5, 0(a2)
1620 ; RV32ZBB-NEXT: lw a1, 4(a2)
1621 ; RV32ZBB-NEXT: sltu a2, a7, a6
1622 ; RV32ZBB-NEXT: mv t4, a2
1623 ; RV32ZBB-NEXT: beq t0, t1, .LBB22_2
1624 ; RV32ZBB-NEXT: # %bb.1:
1625 ; RV32ZBB-NEXT: slt t4, t1, t0
1626 ; RV32ZBB-NEXT: .LBB22_2:
1627 ; RV32ZBB-NEXT: sltu t2, a5, a3
1628 ; RV32ZBB-NEXT: sltu t5, a1, a4
1629 ; RV32ZBB-NEXT: mv t3, t2
1630 ; RV32ZBB-NEXT: beq a4, a1, .LBB22_4
1631 ; RV32ZBB-NEXT: # %bb.3:
1632 ; RV32ZBB-NEXT: mv t3, t5
1633 ; RV32ZBB-NEXT: .LBB22_4:
1634 ; RV32ZBB-NEXT: addi sp, sp, -16
1635 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
1636 ; RV32ZBB-NEXT: xor t6, t0, t1
1637 ; RV32ZBB-NEXT: xor s0, a6, a7
1638 ; RV32ZBB-NEXT: or t6, s0, t6
1639 ; RV32ZBB-NEXT: beqz t6, .LBB22_6
1640 ; RV32ZBB-NEXT: # %bb.5:
1641 ; RV32ZBB-NEXT: mv t3, t4
1642 ; RV32ZBB-NEXT: .LBB22_6:
1643 ; RV32ZBB-NEXT: mv t4, t2
1644 ; RV32ZBB-NEXT: beq a1, a4, .LBB22_8
1645 ; RV32ZBB-NEXT: # %bb.7:
1646 ; RV32ZBB-NEXT: mv t4, t5
1647 ; RV32ZBB-NEXT: .LBB22_8:
1648 ; RV32ZBB-NEXT: sltu t5, a3, a5
1649 ; RV32ZBB-NEXT: mv t6, t5
1650 ; RV32ZBB-NEXT: beq a4, a1, .LBB22_10
1651 ; RV32ZBB-NEXT: # %bb.9:
1652 ; RV32ZBB-NEXT: sltu t6, a4, a1
1653 ; RV32ZBB-NEXT: .LBB22_10:
1654 ; RV32ZBB-NEXT: bnez t3, .LBB22_12
1655 ; RV32ZBB-NEXT: # %bb.11:
1656 ; RV32ZBB-NEXT: sub t0, t1, t0
1657 ; RV32ZBB-NEXT: sub a6, a7, a6
1658 ; RV32ZBB-NEXT: sub a3, a5, a3
1659 ; RV32ZBB-NEXT: sub a1, a1, a4
1660 ; RV32ZBB-NEXT: sub a4, t0, a2
1661 ; RV32ZBB-NEXT: sltu a5, a6, t4
1662 ; RV32ZBB-NEXT: sub a2, a1, t2
1663 ; RV32ZBB-NEXT: sub a1, a4, a5
1664 ; RV32ZBB-NEXT: sub a4, a6, t4
1665 ; RV32ZBB-NEXT: j .LBB22_13
1666 ; RV32ZBB-NEXT: .LBB22_12:
1667 ; RV32ZBB-NEXT: sltu a2, a6, a7
1668 ; RV32ZBB-NEXT: sub t0, t0, t1
1669 ; RV32ZBB-NEXT: sub a6, a6, a7
1670 ; RV32ZBB-NEXT: sub a3, a3, a5
1671 ; RV32ZBB-NEXT: sub a4, a4, a1
1672 ; RV32ZBB-NEXT: sub a1, t0, a2
1673 ; RV32ZBB-NEXT: sltu a5, a6, t6
1674 ; RV32ZBB-NEXT: sub a2, a4, t5
1675 ; RV32ZBB-NEXT: sub a1, a1, a5
1676 ; RV32ZBB-NEXT: sub a4, a6, t6
1677 ; RV32ZBB-NEXT: .LBB22_13:
1678 ; RV32ZBB-NEXT: sw a3, 0(a0)
1679 ; RV32ZBB-NEXT: sw a2, 4(a0)
1680 ; RV32ZBB-NEXT: sw a4, 8(a0)
1681 ; RV32ZBB-NEXT: sw a1, 12(a0)
1682 ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
1683 ; RV32ZBB-NEXT: addi sp, sp, 16
1686 ; RV64ZBB-LABEL: abd_cmp_i128:
1688 ; RV64ZBB-NEXT: sltu a4, a2, a0
1689 ; RV64ZBB-NEXT: mv a5, a4
1690 ; RV64ZBB-NEXT: beq a1, a3, .LBB22_2
1691 ; RV64ZBB-NEXT: # %bb.1:
1692 ; RV64ZBB-NEXT: slt a5, a3, a1
1693 ; RV64ZBB-NEXT: .LBB22_2:
1694 ; RV64ZBB-NEXT: bnez a5, .LBB22_4
1695 ; RV64ZBB-NEXT: # %bb.3:
1696 ; RV64ZBB-NEXT: sub a1, a3, a1
1697 ; RV64ZBB-NEXT: sub a1, a1, a4
1698 ; RV64ZBB-NEXT: sub a0, a2, a0
1700 ; RV64ZBB-NEXT: .LBB22_4:
1701 ; RV64ZBB-NEXT: sltu a4, a0, a2
1702 ; RV64ZBB-NEXT: sub a1, a1, a3
1703 ; RV64ZBB-NEXT: sub a1, a1, a4
1704 ; RV64ZBB-NEXT: sub a0, a0, a2
1706 %cmp = icmp sge i128 %a, %b
1707 %ab = sub i128 %a, %b
1708 %ba = sub i128 %b, %a
1709 %sel = select i1 %cmp, i128 %ab, i128 %ba
1714 ; abs(sub_nsw(x, y)) -> abds(a,b)
1717 define i8 @abd_subnsw_i8(i8 %a, i8 %b) nounwind {
1718 ; RV32I-LABEL: abd_subnsw_i8:
1720 ; RV32I-NEXT: sub a0, a0, a1
1721 ; RV32I-NEXT: slli a1, a0, 24
1722 ; RV32I-NEXT: srai a1, a1, 31
1723 ; RV32I-NEXT: xor a0, a0, a1
1724 ; RV32I-NEXT: sub a0, a0, a1
1727 ; RV64I-LABEL: abd_subnsw_i8:
1729 ; RV64I-NEXT: sub a0, a0, a1
1730 ; RV64I-NEXT: slli a1, a0, 56
1731 ; RV64I-NEXT: srai a1, a1, 63
1732 ; RV64I-NEXT: xor a0, a0, a1
1733 ; RV64I-NEXT: sub a0, a0, a1
1736 ; RV32ZBB-LABEL: abd_subnsw_i8:
1738 ; RV32ZBB-NEXT: sub a0, a0, a1
1739 ; RV32ZBB-NEXT: sext.b a0, a0
1740 ; RV32ZBB-NEXT: neg a1, a0
1741 ; RV32ZBB-NEXT: max a0, a0, a1
1744 ; RV64ZBB-LABEL: abd_subnsw_i8:
1746 ; RV64ZBB-NEXT: subw a0, a0, a1
1747 ; RV64ZBB-NEXT: sext.b a0, a0
1748 ; RV64ZBB-NEXT: neg a1, a0
1749 ; RV64ZBB-NEXT: max a0, a0, a1
1751 %sub = sub nsw i8 %a, %b
1752 %abs = call i8 @llvm.abs.i8(i8 %sub, i1 false)
1756 define i8 @abd_subnsw_i8_undef(i8 %a, i8 %b) nounwind {
1757 ; RV32I-LABEL: abd_subnsw_i8_undef:
1759 ; RV32I-NEXT: sub a0, a0, a1
1760 ; RV32I-NEXT: slli a1, a0, 24
1761 ; RV32I-NEXT: srai a1, a1, 31
1762 ; RV32I-NEXT: xor a0, a0, a1
1763 ; RV32I-NEXT: sub a0, a0, a1
1766 ; RV64I-LABEL: abd_subnsw_i8_undef:
1768 ; RV64I-NEXT: sub a0, a0, a1
1769 ; RV64I-NEXT: slli a1, a0, 56
1770 ; RV64I-NEXT: srai a1, a1, 63
1771 ; RV64I-NEXT: xor a0, a0, a1
1772 ; RV64I-NEXT: sub a0, a0, a1
1775 ; RV32ZBB-LABEL: abd_subnsw_i8_undef:
1777 ; RV32ZBB-NEXT: sub a0, a0, a1
1778 ; RV32ZBB-NEXT: sext.b a0, a0
1779 ; RV32ZBB-NEXT: neg a1, a0
1780 ; RV32ZBB-NEXT: max a0, a0, a1
1783 ; RV64ZBB-LABEL: abd_subnsw_i8_undef:
1785 ; RV64ZBB-NEXT: subw a0, a0, a1
1786 ; RV64ZBB-NEXT: sext.b a0, a0
1787 ; RV64ZBB-NEXT: neg a1, a0
1788 ; RV64ZBB-NEXT: max a0, a0, a1
1790 %sub = sub nsw i8 %a, %b
1791 %abs = call i8 @llvm.abs.i8(i8 %sub, i1 true)
1795 define i16 @abd_subnsw_i16(i16 %a, i16 %b) nounwind {
1796 ; RV32I-LABEL: abd_subnsw_i16:
1798 ; RV32I-NEXT: sub a0, a0, a1
1799 ; RV32I-NEXT: slli a1, a0, 16
1800 ; RV32I-NEXT: srai a1, a1, 31
1801 ; RV32I-NEXT: xor a0, a0, a1
1802 ; RV32I-NEXT: sub a0, a0, a1
1805 ; RV64I-LABEL: abd_subnsw_i16:
1807 ; RV64I-NEXT: sub a0, a0, a1
1808 ; RV64I-NEXT: slli a1, a0, 48
1809 ; RV64I-NEXT: srai a1, a1, 63
1810 ; RV64I-NEXT: xor a0, a0, a1
1811 ; RV64I-NEXT: sub a0, a0, a1
1814 ; RV32ZBB-LABEL: abd_subnsw_i16:
1816 ; RV32ZBB-NEXT: sub a0, a0, a1
1817 ; RV32ZBB-NEXT: sext.h a0, a0
1818 ; RV32ZBB-NEXT: neg a1, a0
1819 ; RV32ZBB-NEXT: max a0, a0, a1
1822 ; RV64ZBB-LABEL: abd_subnsw_i16:
1824 ; RV64ZBB-NEXT: subw a0, a0, a1
1825 ; RV64ZBB-NEXT: sext.h a0, a0
1826 ; RV64ZBB-NEXT: neg a1, a0
1827 ; RV64ZBB-NEXT: max a0, a0, a1
1829 %sub = sub nsw i16 %a, %b
1830 %abs = call i16 @llvm.abs.i16(i16 %sub, i1 false)
1834 define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind {
1835 ; RV32I-LABEL: abd_subnsw_i16_undef:
1837 ; RV32I-NEXT: sub a0, a0, a1
1838 ; RV32I-NEXT: slli a1, a0, 16
1839 ; RV32I-NEXT: srai a1, a1, 31
1840 ; RV32I-NEXT: xor a0, a0, a1
1841 ; RV32I-NEXT: sub a0, a0, a1
1844 ; RV64I-LABEL: abd_subnsw_i16_undef:
1846 ; RV64I-NEXT: sub a0, a0, a1
1847 ; RV64I-NEXT: slli a1, a0, 48
1848 ; RV64I-NEXT: srai a1, a1, 63
1849 ; RV64I-NEXT: xor a0, a0, a1
1850 ; RV64I-NEXT: sub a0, a0, a1
1853 ; RV32ZBB-LABEL: abd_subnsw_i16_undef:
1855 ; RV32ZBB-NEXT: sub a0, a0, a1
1856 ; RV32ZBB-NEXT: sext.h a0, a0
1857 ; RV32ZBB-NEXT: neg a1, a0
1858 ; RV32ZBB-NEXT: max a0, a0, a1
1861 ; RV64ZBB-LABEL: abd_subnsw_i16_undef:
1863 ; RV64ZBB-NEXT: subw a0, a0, a1
1864 ; RV64ZBB-NEXT: sext.h a0, a0
1865 ; RV64ZBB-NEXT: neg a1, a0
1866 ; RV64ZBB-NEXT: max a0, a0, a1
1868 %sub = sub nsw i16 %a, %b
1869 %abs = call i16 @llvm.abs.i16(i16 %sub, i1 true)
1873 define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
1874 ; RV32I-LABEL: abd_subnsw_i32:
1876 ; RV32I-NEXT: sub a0, a0, a1
1877 ; RV32I-NEXT: srai a1, a0, 31
1878 ; RV32I-NEXT: xor a0, a0, a1
1879 ; RV32I-NEXT: sub a0, a0, a1
1882 ; RV64I-LABEL: abd_subnsw_i32:
1884 ; RV64I-NEXT: subw a0, a0, a1
1885 ; RV64I-NEXT: sraiw a1, a0, 31
1886 ; RV64I-NEXT: xor a0, a0, a1
1887 ; RV64I-NEXT: subw a0, a0, a1
1890 ; RV32ZBB-LABEL: abd_subnsw_i32:
1892 ; RV32ZBB-NEXT: sub a0, a0, a1
1893 ; RV32ZBB-NEXT: neg a1, a0
1894 ; RV32ZBB-NEXT: max a0, a0, a1
1897 ; RV64ZBB-LABEL: abd_subnsw_i32:
1899 ; RV64ZBB-NEXT: subw a0, a0, a1
1900 ; RV64ZBB-NEXT: negw a1, a0
1901 ; RV64ZBB-NEXT: max a0, a0, a1
1903 %sub = sub nsw i32 %a, %b
1904 %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
1908 define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
1909 ; RV32I-LABEL: abd_subnsw_i32_undef:
1911 ; RV32I-NEXT: sub a0, a0, a1
1912 ; RV32I-NEXT: srai a1, a0, 31
1913 ; RV32I-NEXT: xor a0, a0, a1
1914 ; RV32I-NEXT: sub a0, a0, a1
1917 ; RV64I-LABEL: abd_subnsw_i32_undef:
1919 ; RV64I-NEXT: subw a0, a0, a1
1920 ; RV64I-NEXT: sraiw a1, a0, 31
1921 ; RV64I-NEXT: xor a0, a0, a1
1922 ; RV64I-NEXT: subw a0, a0, a1
1925 ; RV32ZBB-LABEL: abd_subnsw_i32_undef:
1927 ; RV32ZBB-NEXT: sub a0, a0, a1
1928 ; RV32ZBB-NEXT: neg a1, a0
1929 ; RV32ZBB-NEXT: max a0, a0, a1
1932 ; RV64ZBB-LABEL: abd_subnsw_i32_undef:
1934 ; RV64ZBB-NEXT: subw a0, a0, a1
1935 ; RV64ZBB-NEXT: negw a1, a0
1936 ; RV64ZBB-NEXT: max a0, a0, a1
1938 %sub = sub nsw i32 %a, %b
1939 %abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
1943 define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
1944 ; RV32I-LABEL: abd_subnsw_i64:
1946 ; RV32I-NEXT: sltu a4, a0, a2
1947 ; RV32I-NEXT: sub a1, a1, a3
1948 ; RV32I-NEXT: sub a1, a1, a4
1949 ; RV32I-NEXT: sub a0, a0, a2
1950 ; RV32I-NEXT: bgez a1, .LBB29_2
1951 ; RV32I-NEXT: # %bb.1:
1952 ; RV32I-NEXT: snez a2, a0
1953 ; RV32I-NEXT: add a1, a1, a2
1954 ; RV32I-NEXT: neg a1, a1
1955 ; RV32I-NEXT: neg a0, a0
1956 ; RV32I-NEXT: .LBB29_2:
1959 ; RV64I-LABEL: abd_subnsw_i64:
1961 ; RV64I-NEXT: sub a0, a0, a1
1962 ; RV64I-NEXT: srai a1, a0, 63
1963 ; RV64I-NEXT: xor a0, a0, a1
1964 ; RV64I-NEXT: sub a0, a0, a1
1967 ; RV32ZBB-LABEL: abd_subnsw_i64:
1969 ; RV32ZBB-NEXT: sltu a4, a0, a2
1970 ; RV32ZBB-NEXT: sub a1, a1, a3
1971 ; RV32ZBB-NEXT: sub a1, a1, a4
1972 ; RV32ZBB-NEXT: sub a0, a0, a2
1973 ; RV32ZBB-NEXT: bgez a1, .LBB29_2
1974 ; RV32ZBB-NEXT: # %bb.1:
1975 ; RV32ZBB-NEXT: snez a2, a0
1976 ; RV32ZBB-NEXT: add a1, a1, a2
1977 ; RV32ZBB-NEXT: neg a1, a1
1978 ; RV32ZBB-NEXT: neg a0, a0
1979 ; RV32ZBB-NEXT: .LBB29_2:
1982 ; RV64ZBB-LABEL: abd_subnsw_i64:
1984 ; RV64ZBB-NEXT: sub a0, a0, a1
1985 ; RV64ZBB-NEXT: neg a1, a0
1986 ; RV64ZBB-NEXT: max a0, a0, a1
1988 %sub = sub nsw i64 %a, %b
1989 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
1993 define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
1994 ; RV32I-LABEL: abd_subnsw_i64_undef:
1996 ; RV32I-NEXT: sltu a4, a0, a2
1997 ; RV32I-NEXT: sub a1, a1, a3
1998 ; RV32I-NEXT: sub a1, a1, a4
1999 ; RV32I-NEXT: sub a0, a0, a2
2000 ; RV32I-NEXT: bgez a1, .LBB30_2
2001 ; RV32I-NEXT: # %bb.1:
2002 ; RV32I-NEXT: snez a2, a0
2003 ; RV32I-NEXT: add a1, a1, a2
2004 ; RV32I-NEXT: neg a1, a1
2005 ; RV32I-NEXT: neg a0, a0
2006 ; RV32I-NEXT: .LBB30_2:
2009 ; RV64I-LABEL: abd_subnsw_i64_undef:
2011 ; RV64I-NEXT: sub a0, a0, a1
2012 ; RV64I-NEXT: srai a1, a0, 63
2013 ; RV64I-NEXT: xor a0, a0, a1
2014 ; RV64I-NEXT: sub a0, a0, a1
2017 ; RV32ZBB-LABEL: abd_subnsw_i64_undef:
2019 ; RV32ZBB-NEXT: sltu a4, a0, a2
2020 ; RV32ZBB-NEXT: sub a1, a1, a3
2021 ; RV32ZBB-NEXT: sub a1, a1, a4
2022 ; RV32ZBB-NEXT: sub a0, a0, a2
2023 ; RV32ZBB-NEXT: bgez a1, .LBB30_2
2024 ; RV32ZBB-NEXT: # %bb.1:
2025 ; RV32ZBB-NEXT: snez a2, a0
2026 ; RV32ZBB-NEXT: add a1, a1, a2
2027 ; RV32ZBB-NEXT: neg a1, a1
2028 ; RV32ZBB-NEXT: neg a0, a0
2029 ; RV32ZBB-NEXT: .LBB30_2:
2032 ; RV64ZBB-LABEL: abd_subnsw_i64_undef:
2034 ; RV64ZBB-NEXT: sub a0, a0, a1
2035 ; RV64ZBB-NEXT: neg a1, a0
2036 ; RV64ZBB-NEXT: max a0, a0, a1
2038 %sub = sub nsw i64 %a, %b
2039 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
2043 define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
2044 ; RV32I-LABEL: abd_subnsw_i128:
2046 ; RV32I-NEXT: lw a4, 0(a2)
2047 ; RV32I-NEXT: lw a3, 4(a2)
2048 ; RV32I-NEXT: lw a5, 8(a2)
2049 ; RV32I-NEXT: lw a6, 12(a2)
2050 ; RV32I-NEXT: lw a7, 8(a1)
2051 ; RV32I-NEXT: lw t0, 12(a1)
2052 ; RV32I-NEXT: lw a2, 0(a1)
2053 ; RV32I-NEXT: lw a1, 4(a1)
2054 ; RV32I-NEXT: sltu t1, a7, a5
2055 ; RV32I-NEXT: sub t0, t0, a6
2056 ; RV32I-NEXT: sltu a6, a2, a4
2057 ; RV32I-NEXT: sub t0, t0, t1
2058 ; RV32I-NEXT: mv t1, a6
2059 ; RV32I-NEXT: beq a1, a3, .LBB31_2
2060 ; RV32I-NEXT: # %bb.1:
2061 ; RV32I-NEXT: sltu t1, a1, a3
2062 ; RV32I-NEXT: .LBB31_2:
2063 ; RV32I-NEXT: sub a5, a7, a5
2064 ; RV32I-NEXT: sub a3, a1, a3
2065 ; RV32I-NEXT: sltu a1, a5, t1
2066 ; RV32I-NEXT: sub a5, a5, t1
2067 ; RV32I-NEXT: sub a1, t0, a1
2068 ; RV32I-NEXT: sub a3, a3, a6
2069 ; RV32I-NEXT: sub a2, a2, a4
2070 ; RV32I-NEXT: bgez a1, .LBB31_4
2071 ; RV32I-NEXT: # %bb.3:
2072 ; RV32I-NEXT: snez a4, a3
2073 ; RV32I-NEXT: snez a6, a2
2074 ; RV32I-NEXT: neg a7, a5
2075 ; RV32I-NEXT: snez a5, a5
2076 ; RV32I-NEXT: neg a2, a2
2077 ; RV32I-NEXT: or a4, a6, a4
2078 ; RV32I-NEXT: add a1, a1, a5
2079 ; RV32I-NEXT: add a3, a3, a6
2080 ; RV32I-NEXT: sltu a6, a7, a4
2081 ; RV32I-NEXT: neg a1, a1
2082 ; RV32I-NEXT: sub a5, a7, a4
2083 ; RV32I-NEXT: sub a1, a1, a6
2084 ; RV32I-NEXT: neg a3, a3
2085 ; RV32I-NEXT: .LBB31_4:
2086 ; RV32I-NEXT: sw a2, 0(a0)
2087 ; RV32I-NEXT: sw a3, 4(a0)
2088 ; RV32I-NEXT: sw a5, 8(a0)
2089 ; RV32I-NEXT: sw a1, 12(a0)
2092 ; RV64I-LABEL: abd_subnsw_i128:
2094 ; RV64I-NEXT: sltu a4, a0, a2
2095 ; RV64I-NEXT: sub a1, a1, a3
2096 ; RV64I-NEXT: sub a1, a1, a4
2097 ; RV64I-NEXT: sub a0, a0, a2
2098 ; RV64I-NEXT: bgez a1, .LBB31_2
2099 ; RV64I-NEXT: # %bb.1:
2100 ; RV64I-NEXT: snez a2, a0
2101 ; RV64I-NEXT: add a1, a1, a2
2102 ; RV64I-NEXT: neg a1, a1
2103 ; RV64I-NEXT: neg a0, a0
2104 ; RV64I-NEXT: .LBB31_2:
2107 ; RV32ZBB-LABEL: abd_subnsw_i128:
2109 ; RV32ZBB-NEXT: lw a4, 0(a2)
2110 ; RV32ZBB-NEXT: lw a3, 4(a2)
2111 ; RV32ZBB-NEXT: lw a5, 8(a2)
2112 ; RV32ZBB-NEXT: lw a6, 12(a2)
2113 ; RV32ZBB-NEXT: lw a7, 8(a1)
2114 ; RV32ZBB-NEXT: lw t0, 12(a1)
2115 ; RV32ZBB-NEXT: lw a2, 0(a1)
2116 ; RV32ZBB-NEXT: lw a1, 4(a1)
2117 ; RV32ZBB-NEXT: sltu t1, a7, a5
2118 ; RV32ZBB-NEXT: sub t0, t0, a6
2119 ; RV32ZBB-NEXT: sltu a6, a2, a4
2120 ; RV32ZBB-NEXT: sub t0, t0, t1
2121 ; RV32ZBB-NEXT: mv t1, a6
2122 ; RV32ZBB-NEXT: beq a1, a3, .LBB31_2
2123 ; RV32ZBB-NEXT: # %bb.1:
2124 ; RV32ZBB-NEXT: sltu t1, a1, a3
2125 ; RV32ZBB-NEXT: .LBB31_2:
2126 ; RV32ZBB-NEXT: sub a5, a7, a5
2127 ; RV32ZBB-NEXT: sub a3, a1, a3
2128 ; RV32ZBB-NEXT: sltu a1, a5, t1
2129 ; RV32ZBB-NEXT: sub a5, a5, t1
2130 ; RV32ZBB-NEXT: sub a1, t0, a1
2131 ; RV32ZBB-NEXT: sub a3, a3, a6
2132 ; RV32ZBB-NEXT: sub a2, a2, a4
2133 ; RV32ZBB-NEXT: bgez a1, .LBB31_4
2134 ; RV32ZBB-NEXT: # %bb.3:
2135 ; RV32ZBB-NEXT: snez a4, a3
2136 ; RV32ZBB-NEXT: snez a6, a2
2137 ; RV32ZBB-NEXT: neg a7, a5
2138 ; RV32ZBB-NEXT: snez a5, a5
2139 ; RV32ZBB-NEXT: neg a2, a2
2140 ; RV32ZBB-NEXT: or a4, a6, a4
2141 ; RV32ZBB-NEXT: add a1, a1, a5
2142 ; RV32ZBB-NEXT: add a3, a3, a6
2143 ; RV32ZBB-NEXT: sltu a6, a7, a4
2144 ; RV32ZBB-NEXT: neg a1, a1
2145 ; RV32ZBB-NEXT: sub a5, a7, a4
2146 ; RV32ZBB-NEXT: sub a1, a1, a6
2147 ; RV32ZBB-NEXT: neg a3, a3
2148 ; RV32ZBB-NEXT: .LBB31_4:
2149 ; RV32ZBB-NEXT: sw a2, 0(a0)
2150 ; RV32ZBB-NEXT: sw a3, 4(a0)
2151 ; RV32ZBB-NEXT: sw a5, 8(a0)
2152 ; RV32ZBB-NEXT: sw a1, 12(a0)
2155 ; RV64ZBB-LABEL: abd_subnsw_i128:
2157 ; RV64ZBB-NEXT: sltu a4, a0, a2
2158 ; RV64ZBB-NEXT: sub a1, a1, a3
2159 ; RV64ZBB-NEXT: sub a1, a1, a4
2160 ; RV64ZBB-NEXT: sub a0, a0, a2
2161 ; RV64ZBB-NEXT: bgez a1, .LBB31_2
2162 ; RV64ZBB-NEXT: # %bb.1:
2163 ; RV64ZBB-NEXT: snez a2, a0
2164 ; RV64ZBB-NEXT: add a1, a1, a2
2165 ; RV64ZBB-NEXT: neg a1, a1
2166 ; RV64ZBB-NEXT: neg a0, a0
2167 ; RV64ZBB-NEXT: .LBB31_2:
2169 %sub = sub nsw i128 %a, %b
2170 %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
2174 define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
2175 ; RV32I-LABEL: abd_subnsw_i128_undef:
2177 ; RV32I-NEXT: lw a4, 0(a2)
2178 ; RV32I-NEXT: lw a3, 4(a2)
2179 ; RV32I-NEXT: lw a5, 8(a2)
2180 ; RV32I-NEXT: lw a6, 12(a2)
2181 ; RV32I-NEXT: lw a7, 8(a1)
2182 ; RV32I-NEXT: lw t0, 12(a1)
2183 ; RV32I-NEXT: lw a2, 0(a1)
2184 ; RV32I-NEXT: lw a1, 4(a1)
2185 ; RV32I-NEXT: sltu t1, a7, a5
2186 ; RV32I-NEXT: sub t0, t0, a6
2187 ; RV32I-NEXT: sltu a6, a2, a4
2188 ; RV32I-NEXT: sub t0, t0, t1
2189 ; RV32I-NEXT: mv t1, a6
2190 ; RV32I-NEXT: beq a1, a3, .LBB32_2
2191 ; RV32I-NEXT: # %bb.1:
2192 ; RV32I-NEXT: sltu t1, a1, a3
2193 ; RV32I-NEXT: .LBB32_2:
2194 ; RV32I-NEXT: sub a5, a7, a5
2195 ; RV32I-NEXT: sub a3, a1, a3
2196 ; RV32I-NEXT: sltu a1, a5, t1
2197 ; RV32I-NEXT: sub a5, a5, t1
2198 ; RV32I-NEXT: sub a1, t0, a1
2199 ; RV32I-NEXT: sub a3, a3, a6
2200 ; RV32I-NEXT: sub a2, a2, a4
2201 ; RV32I-NEXT: bgez a1, .LBB32_4
2202 ; RV32I-NEXT: # %bb.3:
2203 ; RV32I-NEXT: snez a4, a3
2204 ; RV32I-NEXT: snez a6, a2
2205 ; RV32I-NEXT: neg a7, a5
2206 ; RV32I-NEXT: snez a5, a5
2207 ; RV32I-NEXT: neg a2, a2
2208 ; RV32I-NEXT: or a4, a6, a4
2209 ; RV32I-NEXT: add a1, a1, a5
2210 ; RV32I-NEXT: add a3, a3, a6
2211 ; RV32I-NEXT: sltu a6, a7, a4
2212 ; RV32I-NEXT: neg a1, a1
2213 ; RV32I-NEXT: sub a5, a7, a4
2214 ; RV32I-NEXT: sub a1, a1, a6
2215 ; RV32I-NEXT: neg a3, a3
2216 ; RV32I-NEXT: .LBB32_4:
2217 ; RV32I-NEXT: sw a2, 0(a0)
2218 ; RV32I-NEXT: sw a3, 4(a0)
2219 ; RV32I-NEXT: sw a5, 8(a0)
2220 ; RV32I-NEXT: sw a1, 12(a0)
2223 ; RV64I-LABEL: abd_subnsw_i128_undef:
2225 ; RV64I-NEXT: sltu a4, a0, a2
2226 ; RV64I-NEXT: sub a1, a1, a3
2227 ; RV64I-NEXT: sub a1, a1, a4
2228 ; RV64I-NEXT: sub a0, a0, a2
2229 ; RV64I-NEXT: bgez a1, .LBB32_2
2230 ; RV64I-NEXT: # %bb.1:
2231 ; RV64I-NEXT: snez a2, a0
2232 ; RV64I-NEXT: add a1, a1, a2
2233 ; RV64I-NEXT: neg a1, a1
2234 ; RV64I-NEXT: neg a0, a0
2235 ; RV64I-NEXT: .LBB32_2:
2238 ; RV32ZBB-LABEL: abd_subnsw_i128_undef:
2240 ; RV32ZBB-NEXT: lw a4, 0(a2)
2241 ; RV32ZBB-NEXT: lw a3, 4(a2)
2242 ; RV32ZBB-NEXT: lw a5, 8(a2)
2243 ; RV32ZBB-NEXT: lw a6, 12(a2)
2244 ; RV32ZBB-NEXT: lw a7, 8(a1)
2245 ; RV32ZBB-NEXT: lw t0, 12(a1)
2246 ; RV32ZBB-NEXT: lw a2, 0(a1)
2247 ; RV32ZBB-NEXT: lw a1, 4(a1)
2248 ; RV32ZBB-NEXT: sltu t1, a7, a5
2249 ; RV32ZBB-NEXT: sub t0, t0, a6
2250 ; RV32ZBB-NEXT: sltu a6, a2, a4
2251 ; RV32ZBB-NEXT: sub t0, t0, t1
2252 ; RV32ZBB-NEXT: mv t1, a6
2253 ; RV32ZBB-NEXT: beq a1, a3, .LBB32_2
2254 ; RV32ZBB-NEXT: # %bb.1:
2255 ; RV32ZBB-NEXT: sltu t1, a1, a3
2256 ; RV32ZBB-NEXT: .LBB32_2:
2257 ; RV32ZBB-NEXT: sub a5, a7, a5
2258 ; RV32ZBB-NEXT: sub a3, a1, a3
2259 ; RV32ZBB-NEXT: sltu a1, a5, t1
2260 ; RV32ZBB-NEXT: sub a5, a5, t1
2261 ; RV32ZBB-NEXT: sub a1, t0, a1
2262 ; RV32ZBB-NEXT: sub a3, a3, a6
2263 ; RV32ZBB-NEXT: sub a2, a2, a4
2264 ; RV32ZBB-NEXT: bgez a1, .LBB32_4
2265 ; RV32ZBB-NEXT: # %bb.3:
2266 ; RV32ZBB-NEXT: snez a4, a3
2267 ; RV32ZBB-NEXT: snez a6, a2
2268 ; RV32ZBB-NEXT: neg a7, a5
2269 ; RV32ZBB-NEXT: snez a5, a5
2270 ; RV32ZBB-NEXT: neg a2, a2
2271 ; RV32ZBB-NEXT: or a4, a6, a4
2272 ; RV32ZBB-NEXT: add a1, a1, a5
2273 ; RV32ZBB-NEXT: add a3, a3, a6
2274 ; RV32ZBB-NEXT: sltu a6, a7, a4
2275 ; RV32ZBB-NEXT: neg a1, a1
2276 ; RV32ZBB-NEXT: sub a5, a7, a4
2277 ; RV32ZBB-NEXT: sub a1, a1, a6
2278 ; RV32ZBB-NEXT: neg a3, a3
2279 ; RV32ZBB-NEXT: .LBB32_4:
2280 ; RV32ZBB-NEXT: sw a2, 0(a0)
2281 ; RV32ZBB-NEXT: sw a3, 4(a0)
2282 ; RV32ZBB-NEXT: sw a5, 8(a0)
2283 ; RV32ZBB-NEXT: sw a1, 12(a0)
2286 ; RV64ZBB-LABEL: abd_subnsw_i128_undef:
2288 ; RV64ZBB-NEXT: sltu a4, a0, a2
2289 ; RV64ZBB-NEXT: sub a1, a1, a3
2290 ; RV64ZBB-NEXT: sub a1, a1, a4
2291 ; RV64ZBB-NEXT: sub a0, a0, a2
2292 ; RV64ZBB-NEXT: bgez a1, .LBB32_2
2293 ; RV64ZBB-NEXT: # %bb.1:
2294 ; RV64ZBB-NEXT: snez a2, a0
2295 ; RV64ZBB-NEXT: add a1, a1, a2
2296 ; RV64ZBB-NEXT: neg a1, a1
2297 ; RV64ZBB-NEXT: neg a0, a0
2298 ; RV64ZBB-NEXT: .LBB32_2:
2300 %sub = sub nsw i128 %a, %b
2301 %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
2309 define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind {
2310 ; RV32I-LABEL: abd_sub_i32:
2312 ; RV32I-NEXT: sub a0, a0, a1
2313 ; RV32I-NEXT: srai a1, a0, 31
2314 ; RV32I-NEXT: xor a0, a0, a1
2315 ; RV32I-NEXT: sub a0, a0, a1
2318 ; RV64I-LABEL: abd_sub_i32:
2320 ; RV64I-NEXT: subw a0, a0, a1
2321 ; RV64I-NEXT: sraiw a1, a0, 31
2322 ; RV64I-NEXT: xor a0, a0, a1
2323 ; RV64I-NEXT: subw a0, a0, a1
2326 ; RV32ZBB-LABEL: abd_sub_i32:
2328 ; RV32ZBB-NEXT: sub a0, a0, a1
2329 ; RV32ZBB-NEXT: neg a1, a0
2330 ; RV32ZBB-NEXT: max a0, a0, a1
2333 ; RV64ZBB-LABEL: abd_sub_i32:
2335 ; RV64ZBB-NEXT: subw a0, a0, a1
2336 ; RV64ZBB-NEXT: negw a1, a0
2337 ; RV64ZBB-NEXT: max a0, a0, a1
2339 %sub = sub i32 %a, %b
2340 %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
2345 ; sub(select(icmp(a,b),a,b),select(icmp(a,b),b,a)) -> abds(a,b)
2348 define i8 @abd_select_i8(i8 %a, i8 %b) nounwind {
2349 ; RV32I-LABEL: abd_select_i8:
2351 ; RV32I-NEXT: slli a1, a1, 24
2352 ; RV32I-NEXT: slli a0, a0, 24
2353 ; RV32I-NEXT: srai a1, a1, 24
2354 ; RV32I-NEXT: srai a0, a0, 24
2355 ; RV32I-NEXT: sub a0, a0, a1
2356 ; RV32I-NEXT: srai a1, a0, 31
2357 ; RV32I-NEXT: xor a0, a0, a1
2358 ; RV32I-NEXT: sub a0, a0, a1
2361 ; RV64I-LABEL: abd_select_i8:
2363 ; RV64I-NEXT: slli a1, a1, 56
2364 ; RV64I-NEXT: slli a0, a0, 56
2365 ; RV64I-NEXT: srai a1, a1, 56
2366 ; RV64I-NEXT: srai a0, a0, 56
2367 ; RV64I-NEXT: sub a0, a0, a1
2368 ; RV64I-NEXT: srai a1, a0, 63
2369 ; RV64I-NEXT: xor a0, a0, a1
2370 ; RV64I-NEXT: sub a0, a0, a1
2373 ; ZBB-LABEL: abd_select_i8:
2375 ; ZBB-NEXT: sext.b a1, a1
2376 ; ZBB-NEXT: sext.b a0, a0
2377 ; ZBB-NEXT: min a2, a0, a1
2378 ; ZBB-NEXT: max a0, a0, a1
2379 ; ZBB-NEXT: sub a0, a0, a2
2381 %cmp = icmp slt i8 %a, %b
2382 %ab = select i1 %cmp, i8 %a, i8 %b
2383 %ba = select i1 %cmp, i8 %b, i8 %a
2384 %sub = sub i8 %ba, %ab
2388 define i16 @abd_select_i16(i16 %a, i16 %b) nounwind {
2389 ; RV32I-LABEL: abd_select_i16:
2391 ; RV32I-NEXT: slli a1, a1, 16
2392 ; RV32I-NEXT: slli a0, a0, 16
2393 ; RV32I-NEXT: srai a1, a1, 16
2394 ; RV32I-NEXT: srai a0, a0, 16
2395 ; RV32I-NEXT: sub a0, a0, a1
2396 ; RV32I-NEXT: srai a1, a0, 31
2397 ; RV32I-NEXT: xor a0, a0, a1
2398 ; RV32I-NEXT: sub a0, a0, a1
2401 ; RV64I-LABEL: abd_select_i16:
2403 ; RV64I-NEXT: slli a1, a1, 48
2404 ; RV64I-NEXT: slli a0, a0, 48
2405 ; RV64I-NEXT: srai a1, a1, 48
2406 ; RV64I-NEXT: srai a0, a0, 48
2407 ; RV64I-NEXT: sub a0, a0, a1
2408 ; RV64I-NEXT: srai a1, a0, 63
2409 ; RV64I-NEXT: xor a0, a0, a1
2410 ; RV64I-NEXT: sub a0, a0, a1
2413 ; ZBB-LABEL: abd_select_i16:
2415 ; ZBB-NEXT: sext.h a1, a1
2416 ; ZBB-NEXT: sext.h a0, a0
2417 ; ZBB-NEXT: min a2, a0, a1
2418 ; ZBB-NEXT: max a0, a0, a1
2419 ; ZBB-NEXT: sub a0, a0, a2
2421 %cmp = icmp sle i16 %a, %b
2422 %ab = select i1 %cmp, i16 %a, i16 %b
2423 %ba = select i1 %cmp, i16 %b, i16 %a
2424 %sub = sub i16 %ba, %ab
2428 define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
2429 ; RV32I-LABEL: abd_select_i32:
2431 ; RV32I-NEXT: blt a1, a0, .LBB36_2
2432 ; RV32I-NEXT: # %bb.1:
2433 ; RV32I-NEXT: sub a0, a1, a0
2435 ; RV32I-NEXT: .LBB36_2:
2436 ; RV32I-NEXT: sub a0, a0, a1
2439 ; RV64I-LABEL: abd_select_i32:
2441 ; RV64I-NEXT: sext.w a1, a1
2442 ; RV64I-NEXT: sext.w a0, a0
2443 ; RV64I-NEXT: sub a0, a0, a1
2444 ; RV64I-NEXT: srai a1, a0, 63
2445 ; RV64I-NEXT: xor a0, a0, a1
2446 ; RV64I-NEXT: sub a0, a0, a1
2449 ; RV32ZBB-LABEL: abd_select_i32:
2451 ; RV32ZBB-NEXT: min a2, a0, a1
2452 ; RV32ZBB-NEXT: max a0, a0, a1
2453 ; RV32ZBB-NEXT: sub a0, a0, a2
2456 ; RV64ZBB-LABEL: abd_select_i32:
2458 ; RV64ZBB-NEXT: sext.w a1, a1
2459 ; RV64ZBB-NEXT: sext.w a0, a0
2460 ; RV64ZBB-NEXT: min a2, a0, a1
2461 ; RV64ZBB-NEXT: max a0, a0, a1
2462 ; RV64ZBB-NEXT: sub a0, a0, a2
2464 %cmp = icmp sgt i32 %a, %b
2465 %ab = select i1 %cmp, i32 %a, i32 %b
2466 %ba = select i1 %cmp, i32 %b, i32 %a
2467 %sub = sub i32 %ab, %ba
2471 define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
2472 ; RV32I-LABEL: abd_select_i64:
2474 ; RV32I-NEXT: sltu a4, a2, a0
2475 ; RV32I-NEXT: mv a5, a4
2476 ; RV32I-NEXT: beq a1, a3, .LBB37_2
2477 ; RV32I-NEXT: # %bb.1:
2478 ; RV32I-NEXT: slt a5, a3, a1
2479 ; RV32I-NEXT: .LBB37_2:
2480 ; RV32I-NEXT: bnez a5, .LBB37_4
2481 ; RV32I-NEXT: # %bb.3:
2482 ; RV32I-NEXT: sub a1, a3, a1
2483 ; RV32I-NEXT: sub a1, a1, a4
2484 ; RV32I-NEXT: sub a0, a2, a0
2486 ; RV32I-NEXT: .LBB37_4:
2487 ; RV32I-NEXT: sltu a4, a0, a2
2488 ; RV32I-NEXT: sub a1, a1, a3
2489 ; RV32I-NEXT: sub a1, a1, a4
2490 ; RV32I-NEXT: sub a0, a0, a2
2493 ; RV64I-LABEL: abd_select_i64:
2495 ; RV64I-NEXT: blt a1, a0, .LBB37_2
2496 ; RV64I-NEXT: # %bb.1:
2497 ; RV64I-NEXT: sub a0, a1, a0
2499 ; RV64I-NEXT: .LBB37_2:
2500 ; RV64I-NEXT: sub a0, a0, a1
2503 ; RV32ZBB-LABEL: abd_select_i64:
2505 ; RV32ZBB-NEXT: sltu a4, a2, a0
2506 ; RV32ZBB-NEXT: mv a5, a4
2507 ; RV32ZBB-NEXT: beq a1, a3, .LBB37_2
2508 ; RV32ZBB-NEXT: # %bb.1:
2509 ; RV32ZBB-NEXT: slt a5, a3, a1
2510 ; RV32ZBB-NEXT: .LBB37_2:
2511 ; RV32ZBB-NEXT: bnez a5, .LBB37_4
2512 ; RV32ZBB-NEXT: # %bb.3:
2513 ; RV32ZBB-NEXT: sub a1, a3, a1
2514 ; RV32ZBB-NEXT: sub a1, a1, a4
2515 ; RV32ZBB-NEXT: sub a0, a2, a0
2517 ; RV32ZBB-NEXT: .LBB37_4:
2518 ; RV32ZBB-NEXT: sltu a4, a0, a2
2519 ; RV32ZBB-NEXT: sub a1, a1, a3
2520 ; RV32ZBB-NEXT: sub a1, a1, a4
2521 ; RV32ZBB-NEXT: sub a0, a0, a2
2524 ; RV64ZBB-LABEL: abd_select_i64:
2526 ; RV64ZBB-NEXT: min a2, a0, a1
2527 ; RV64ZBB-NEXT: max a0, a0, a1
2528 ; RV64ZBB-NEXT: sub a0, a0, a2
2530 %cmp = icmp sge i64 %a, %b
2531 %ab = select i1 %cmp, i64 %a, i64 %b
2532 %ba = select i1 %cmp, i64 %b, i64 %a
2533 %sub = sub i64 %ab, %ba
2537 define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
2538 ; RV32I-LABEL: abd_select_i128:
2540 ; RV32I-NEXT: lw a3, 0(a1)
2541 ; RV32I-NEXT: lw a4, 4(a1)
2542 ; RV32I-NEXT: lw a6, 8(a1)
2543 ; RV32I-NEXT: lw t0, 12(a1)
2544 ; RV32I-NEXT: lw a7, 8(a2)
2545 ; RV32I-NEXT: lw t1, 12(a2)
2546 ; RV32I-NEXT: lw a5, 0(a2)
2547 ; RV32I-NEXT: lw a1, 4(a2)
2548 ; RV32I-NEXT: sltu a2, a7, a6
2549 ; RV32I-NEXT: mv t4, a2
2550 ; RV32I-NEXT: beq t0, t1, .LBB38_2
2551 ; RV32I-NEXT: # %bb.1:
2552 ; RV32I-NEXT: slt t4, t1, t0
2553 ; RV32I-NEXT: .LBB38_2:
2554 ; RV32I-NEXT: sltu t2, a5, a3
2555 ; RV32I-NEXT: sltu t5, a1, a4
2556 ; RV32I-NEXT: mv t3, t2
2557 ; RV32I-NEXT: beq a4, a1, .LBB38_4
2558 ; RV32I-NEXT: # %bb.3:
2559 ; RV32I-NEXT: mv t3, t5
2560 ; RV32I-NEXT: .LBB38_4:
2561 ; RV32I-NEXT: addi sp, sp, -16
2562 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
2563 ; RV32I-NEXT: xor t6, t0, t1
2564 ; RV32I-NEXT: xor s0, a6, a7
2565 ; RV32I-NEXT: or t6, s0, t6
2566 ; RV32I-NEXT: beqz t6, .LBB38_6
2567 ; RV32I-NEXT: # %bb.5:
2568 ; RV32I-NEXT: mv t3, t4
2569 ; RV32I-NEXT: .LBB38_6:
2570 ; RV32I-NEXT: mv t4, t2
2571 ; RV32I-NEXT: beq a1, a4, .LBB38_8
2572 ; RV32I-NEXT: # %bb.7:
2573 ; RV32I-NEXT: mv t4, t5
2574 ; RV32I-NEXT: .LBB38_8:
2575 ; RV32I-NEXT: sltu t5, a3, a5
2576 ; RV32I-NEXT: mv t6, t5
2577 ; RV32I-NEXT: beq a4, a1, .LBB38_10
2578 ; RV32I-NEXT: # %bb.9:
2579 ; RV32I-NEXT: sltu t6, a4, a1
2580 ; RV32I-NEXT: .LBB38_10:
2581 ; RV32I-NEXT: bnez t3, .LBB38_12
2582 ; RV32I-NEXT: # %bb.11:
2583 ; RV32I-NEXT: sub t0, t1, t0
2584 ; RV32I-NEXT: sub a6, a7, a6
2585 ; RV32I-NEXT: sub a3, a5, a3
2586 ; RV32I-NEXT: sub a1, a1, a4
2587 ; RV32I-NEXT: sub a4, t0, a2
2588 ; RV32I-NEXT: sltu a5, a6, t4
2589 ; RV32I-NEXT: sub a2, a1, t2
2590 ; RV32I-NEXT: sub a1, a4, a5
2591 ; RV32I-NEXT: sub a4, a6, t4
2592 ; RV32I-NEXT: j .LBB38_13
2593 ; RV32I-NEXT: .LBB38_12:
2594 ; RV32I-NEXT: sltu a2, a6, a7
2595 ; RV32I-NEXT: sub t0, t0, t1
2596 ; RV32I-NEXT: sub a6, a6, a7
2597 ; RV32I-NEXT: sub a3, a3, a5
2598 ; RV32I-NEXT: sub a4, a4, a1
2599 ; RV32I-NEXT: sub a1, t0, a2
2600 ; RV32I-NEXT: sltu a5, a6, t6
2601 ; RV32I-NEXT: sub a2, a4, t5
2602 ; RV32I-NEXT: sub a1, a1, a5
2603 ; RV32I-NEXT: sub a4, a6, t6
2604 ; RV32I-NEXT: .LBB38_13:
2605 ; RV32I-NEXT: sw a3, 0(a0)
2606 ; RV32I-NEXT: sw a2, 4(a0)
2607 ; RV32I-NEXT: sw a4, 8(a0)
2608 ; RV32I-NEXT: sw a1, 12(a0)
2609 ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
2610 ; RV32I-NEXT: addi sp, sp, 16
2613 ; RV64I-LABEL: abd_select_i128:
2615 ; RV64I-NEXT: sltu a4, a2, a0
2616 ; RV64I-NEXT: mv a5, a4
2617 ; RV64I-NEXT: beq a1, a3, .LBB38_2
2618 ; RV64I-NEXT: # %bb.1:
2619 ; RV64I-NEXT: slt a5, a3, a1
2620 ; RV64I-NEXT: .LBB38_2:
2621 ; RV64I-NEXT: bnez a5, .LBB38_4
2622 ; RV64I-NEXT: # %bb.3:
2623 ; RV64I-NEXT: sub a1, a3, a1
2624 ; RV64I-NEXT: sub a1, a1, a4
2625 ; RV64I-NEXT: sub a0, a2, a0
2627 ; RV64I-NEXT: .LBB38_4:
2628 ; RV64I-NEXT: sltu a4, a0, a2
2629 ; RV64I-NEXT: sub a1, a1, a3
2630 ; RV64I-NEXT: sub a1, a1, a4
2631 ; RV64I-NEXT: sub a0, a0, a2
2634 ; RV32ZBB-LABEL: abd_select_i128:
2636 ; RV32ZBB-NEXT: lw a3, 0(a1)
2637 ; RV32ZBB-NEXT: lw a4, 4(a1)
2638 ; RV32ZBB-NEXT: lw a6, 8(a1)
2639 ; RV32ZBB-NEXT: lw t0, 12(a1)
2640 ; RV32ZBB-NEXT: lw a7, 8(a2)
2641 ; RV32ZBB-NEXT: lw t1, 12(a2)
2642 ; RV32ZBB-NEXT: lw a5, 0(a2)
2643 ; RV32ZBB-NEXT: lw a1, 4(a2)
2644 ; RV32ZBB-NEXT: sltu a2, a7, a6
2645 ; RV32ZBB-NEXT: mv t4, a2
2646 ; RV32ZBB-NEXT: beq t0, t1, .LBB38_2
2647 ; RV32ZBB-NEXT: # %bb.1:
2648 ; RV32ZBB-NEXT: slt t4, t1, t0
2649 ; RV32ZBB-NEXT: .LBB38_2:
2650 ; RV32ZBB-NEXT: sltu t2, a5, a3
2651 ; RV32ZBB-NEXT: sltu t5, a1, a4
2652 ; RV32ZBB-NEXT: mv t3, t2
2653 ; RV32ZBB-NEXT: beq a4, a1, .LBB38_4
2654 ; RV32ZBB-NEXT: # %bb.3:
2655 ; RV32ZBB-NEXT: mv t3, t5
2656 ; RV32ZBB-NEXT: .LBB38_4:
2657 ; RV32ZBB-NEXT: addi sp, sp, -16
2658 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
2659 ; RV32ZBB-NEXT: xor t6, t0, t1
2660 ; RV32ZBB-NEXT: xor s0, a6, a7
2661 ; RV32ZBB-NEXT: or t6, s0, t6
2662 ; RV32ZBB-NEXT: beqz t6, .LBB38_6
2663 ; RV32ZBB-NEXT: # %bb.5:
2664 ; RV32ZBB-NEXT: mv t3, t4
2665 ; RV32ZBB-NEXT: .LBB38_6:
2666 ; RV32ZBB-NEXT: mv t4, t2
2667 ; RV32ZBB-NEXT: beq a1, a4, .LBB38_8
2668 ; RV32ZBB-NEXT: # %bb.7:
2669 ; RV32ZBB-NEXT: mv t4, t5
2670 ; RV32ZBB-NEXT: .LBB38_8:
2671 ; RV32ZBB-NEXT: sltu t5, a3, a5
2672 ; RV32ZBB-NEXT: mv t6, t5
2673 ; RV32ZBB-NEXT: beq a4, a1, .LBB38_10
2674 ; RV32ZBB-NEXT: # %bb.9:
2675 ; RV32ZBB-NEXT: sltu t6, a4, a1
2676 ; RV32ZBB-NEXT: .LBB38_10:
2677 ; RV32ZBB-NEXT: bnez t3, .LBB38_12
2678 ; RV32ZBB-NEXT: # %bb.11:
2679 ; RV32ZBB-NEXT: sub t0, t1, t0
2680 ; RV32ZBB-NEXT: sub a6, a7, a6
2681 ; RV32ZBB-NEXT: sub a3, a5, a3
2682 ; RV32ZBB-NEXT: sub a1, a1, a4
2683 ; RV32ZBB-NEXT: sub a4, t0, a2
2684 ; RV32ZBB-NEXT: sltu a5, a6, t4
2685 ; RV32ZBB-NEXT: sub a2, a1, t2
2686 ; RV32ZBB-NEXT: sub a1, a4, a5
2687 ; RV32ZBB-NEXT: sub a4, a6, t4
2688 ; RV32ZBB-NEXT: j .LBB38_13
2689 ; RV32ZBB-NEXT: .LBB38_12:
2690 ; RV32ZBB-NEXT: sltu a2, a6, a7
2691 ; RV32ZBB-NEXT: sub t0, t0, t1
2692 ; RV32ZBB-NEXT: sub a6, a6, a7
2693 ; RV32ZBB-NEXT: sub a3, a3, a5
2694 ; RV32ZBB-NEXT: sub a4, a4, a1
2695 ; RV32ZBB-NEXT: sub a1, t0, a2
2696 ; RV32ZBB-NEXT: sltu a5, a6, t6
2697 ; RV32ZBB-NEXT: sub a2, a4, t5
2698 ; RV32ZBB-NEXT: sub a1, a1, a5
2699 ; RV32ZBB-NEXT: sub a4, a6, t6
2700 ; RV32ZBB-NEXT: .LBB38_13:
2701 ; RV32ZBB-NEXT: sw a3, 0(a0)
2702 ; RV32ZBB-NEXT: sw a2, 4(a0)
2703 ; RV32ZBB-NEXT: sw a4, 8(a0)
2704 ; RV32ZBB-NEXT: sw a1, 12(a0)
2705 ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
2706 ; RV32ZBB-NEXT: addi sp, sp, 16
2709 ; RV64ZBB-LABEL: abd_select_i128:
2711 ; RV64ZBB-NEXT: sltu a4, a2, a0
2712 ; RV64ZBB-NEXT: mv a5, a4
2713 ; RV64ZBB-NEXT: beq a1, a3, .LBB38_2
2714 ; RV64ZBB-NEXT: # %bb.1:
2715 ; RV64ZBB-NEXT: slt a5, a3, a1
2716 ; RV64ZBB-NEXT: .LBB38_2:
2717 ; RV64ZBB-NEXT: bnez a5, .LBB38_4
2718 ; RV64ZBB-NEXT: # %bb.3:
2719 ; RV64ZBB-NEXT: sub a1, a3, a1
2720 ; RV64ZBB-NEXT: sub a1, a1, a4
2721 ; RV64ZBB-NEXT: sub a0, a2, a0
2723 ; RV64ZBB-NEXT: .LBB38_4:
2724 ; RV64ZBB-NEXT: sltu a4, a0, a2
2725 ; RV64ZBB-NEXT: sub a1, a1, a3
2726 ; RV64ZBB-NEXT: sub a1, a1, a4
2727 ; RV64ZBB-NEXT: sub a0, a0, a2
2729 %cmp = icmp slt i128 %a, %b
2730 %ab = select i1 %cmp, i128 %a, i128 %b
2731 %ba = select i1 %cmp, i128 %b, i128 %a
2732 %sub = sub i128 %ba, %ab
2736 declare i8 @llvm.abs.i8(i8, i1)
2737 declare i16 @llvm.abs.i16(i16, i1)
2738 declare i32 @llvm.abs.i32(i32, i1)
2739 declare i64 @llvm.abs.i64(i64, i1)
2740 declare i128 @llvm.abs.i128(i128, i1)
2742 declare i8 @llvm.smax.i8(i8, i8)
2743 declare i16 @llvm.smax.i16(i16, i16)
2744 declare i32 @llvm.smax.i32(i32, i32)
2745 declare i64 @llvm.smax.i64(i64, i64)
2747 declare i8 @llvm.smin.i8(i8, i8)
2748 declare i16 @llvm.smin.i16(i16, i16)
2749 declare i32 @llvm.smin.i32(i32, i32)
2750 declare i64 @llvm.smin.i64(i64, i64)