1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32I
3 ; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64I
4 ; RUN: llc < %s -mtriple=riscv32 -mattr=+zbb | FileCheck %s --check-prefixes=ZBB,RV32ZBB
5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb | FileCheck %s --check-prefixes=ZBB,RV64ZBB
7 ; trunc(nabs(sub(sext(a),sext(b)))) -> nabds(a,b)
10 define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
11 ; RV32I-LABEL: abd_ext_i8:
13 ; RV32I-NEXT: slli a1, a1, 24
14 ; RV32I-NEXT: slli a0, a0, 24
15 ; RV32I-NEXT: srai a1, a1, 24
16 ; RV32I-NEXT: srai a0, a0, 24
17 ; RV32I-NEXT: sub a0, a0, a1
18 ; RV32I-NEXT: srai a1, a0, 31
19 ; RV32I-NEXT: xor a0, a0, a1
20 ; RV32I-NEXT: sub a0, a1, a0
23 ; RV64I-LABEL: abd_ext_i8:
25 ; RV64I-NEXT: slli a0, a0, 56
26 ; RV64I-NEXT: slli a1, a1, 56
27 ; RV64I-NEXT: srai a0, a0, 56
28 ; RV64I-NEXT: srai a1, a1, 56
29 ; RV64I-NEXT: sub a0, a0, a1
30 ; RV64I-NEXT: srai a1, a0, 63
31 ; RV64I-NEXT: xor a0, a0, a1
32 ; RV64I-NEXT: sub a0, a1, a0
35 ; RV32ZBB-LABEL: abd_ext_i8:
37 ; RV32ZBB-NEXT: sext.b a1, a1
38 ; RV32ZBB-NEXT: sext.b a0, a0
39 ; RV32ZBB-NEXT: max a2, a0, a1
40 ; RV32ZBB-NEXT: min a0, a0, a1
41 ; RV32ZBB-NEXT: sub a0, a0, a2
44 ; RV64ZBB-LABEL: abd_ext_i8:
46 ; RV64ZBB-NEXT: sext.b a0, a0
47 ; RV64ZBB-NEXT: sext.b a1, a1
48 ; RV64ZBB-NEXT: sub a0, a0, a1
49 ; RV64ZBB-NEXT: neg a1, a0
50 ; RV64ZBB-NEXT: min a0, a0, a1
52 %aext = sext i8 %a to i64
53 %bext = sext i8 %b to i64
54 %sub = sub i64 %aext, %bext
55 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
56 %nabs = sub i64 0, %abs
57 %trunc = trunc i64 %nabs to i8
61 define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
62 ; RV32I-LABEL: abd_ext_i8_i16:
64 ; RV32I-NEXT: slli a1, a1, 16
65 ; RV32I-NEXT: slli a0, a0, 24
66 ; RV32I-NEXT: srai a1, a1, 16
67 ; RV32I-NEXT: srai a0, a0, 24
68 ; RV32I-NEXT: sub a0, a0, a1
69 ; RV32I-NEXT: srai a1, a0, 31
70 ; RV32I-NEXT: xor a0, a0, a1
71 ; RV32I-NEXT: sub a0, a1, a0
74 ; RV64I-LABEL: abd_ext_i8_i16:
76 ; RV64I-NEXT: slli a0, a0, 56
77 ; RV64I-NEXT: slli a1, a1, 48
78 ; RV64I-NEXT: srai a0, a0, 56
79 ; RV64I-NEXT: srai a1, a1, 48
80 ; RV64I-NEXT: sub a0, a0, a1
81 ; RV64I-NEXT: srai a1, a0, 63
82 ; RV64I-NEXT: xor a0, a0, a1
83 ; RV64I-NEXT: sub a0, a1, a0
86 ; RV32ZBB-LABEL: abd_ext_i8_i16:
88 ; RV32ZBB-NEXT: sext.h a1, a1
89 ; RV32ZBB-NEXT: sext.b a0, a0
90 ; RV32ZBB-NEXT: max a2, a0, a1
91 ; RV32ZBB-NEXT: min a0, a0, a1
92 ; RV32ZBB-NEXT: sub a0, a0, a2
95 ; RV64ZBB-LABEL: abd_ext_i8_i16:
97 ; RV64ZBB-NEXT: sext.b a0, a0
98 ; RV64ZBB-NEXT: sext.h a1, a1
99 ; RV64ZBB-NEXT: sub a0, a0, a1
100 ; RV64ZBB-NEXT: neg a1, a0
101 ; RV64ZBB-NEXT: min a0, a0, a1
103 %aext = sext i8 %a to i64
104 %bext = sext i16 %b to i64
105 %sub = sub i64 %aext, %bext
106 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
107 %nabs = sub i64 0, %abs
108 %trunc = trunc i64 %nabs to i8
112 define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
113 ; RV32I-LABEL: abd_ext_i8_undef:
115 ; RV32I-NEXT: slli a1, a1, 24
116 ; RV32I-NEXT: slli a0, a0, 24
117 ; RV32I-NEXT: srai a1, a1, 24
118 ; RV32I-NEXT: srai a0, a0, 24
119 ; RV32I-NEXT: sub a0, a0, a1
120 ; RV32I-NEXT: srai a1, a0, 31
121 ; RV32I-NEXT: xor a0, a0, a1
122 ; RV32I-NEXT: sub a0, a1, a0
125 ; RV64I-LABEL: abd_ext_i8_undef:
127 ; RV64I-NEXT: slli a0, a0, 56
128 ; RV64I-NEXT: slli a1, a1, 56
129 ; RV64I-NEXT: srai a0, a0, 56
130 ; RV64I-NEXT: srai a1, a1, 56
131 ; RV64I-NEXT: sub a0, a0, a1
132 ; RV64I-NEXT: srai a1, a0, 63
133 ; RV64I-NEXT: xor a0, a0, a1
134 ; RV64I-NEXT: sub a0, a1, a0
137 ; RV32ZBB-LABEL: abd_ext_i8_undef:
139 ; RV32ZBB-NEXT: sext.b a1, a1
140 ; RV32ZBB-NEXT: sext.b a0, a0
141 ; RV32ZBB-NEXT: max a2, a0, a1
142 ; RV32ZBB-NEXT: min a0, a0, a1
143 ; RV32ZBB-NEXT: sub a0, a0, a2
146 ; RV64ZBB-LABEL: abd_ext_i8_undef:
148 ; RV64ZBB-NEXT: sext.b a0, a0
149 ; RV64ZBB-NEXT: sext.b a1, a1
150 ; RV64ZBB-NEXT: sub a0, a0, a1
151 ; RV64ZBB-NEXT: neg a1, a0
152 ; RV64ZBB-NEXT: min a0, a0, a1
154 %aext = sext i8 %a to i64
155 %bext = sext i8 %b to i64
156 %sub = sub i64 %aext, %bext
157 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
158 %nabs = sub i64 0, %abs
159 %trunc = trunc i64 %nabs to i8
163 define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
164 ; RV32I-LABEL: abd_ext_i16:
166 ; RV32I-NEXT: slli a1, a1, 16
167 ; RV32I-NEXT: slli a0, a0, 16
168 ; RV32I-NEXT: srai a1, a1, 16
169 ; RV32I-NEXT: srai a0, a0, 16
170 ; RV32I-NEXT: sub a0, a0, a1
171 ; RV32I-NEXT: srai a1, a0, 31
172 ; RV32I-NEXT: xor a0, a0, a1
173 ; RV32I-NEXT: sub a0, a1, a0
176 ; RV64I-LABEL: abd_ext_i16:
178 ; RV64I-NEXT: slli a0, a0, 48
179 ; RV64I-NEXT: slli a1, a1, 48
180 ; RV64I-NEXT: srai a0, a0, 48
181 ; RV64I-NEXT: srai a1, a1, 48
182 ; RV64I-NEXT: sub a0, a0, a1
183 ; RV64I-NEXT: srai a1, a0, 63
184 ; RV64I-NEXT: xor a0, a0, a1
185 ; RV64I-NEXT: sub a0, a1, a0
188 ; RV32ZBB-LABEL: abd_ext_i16:
190 ; RV32ZBB-NEXT: sext.h a1, a1
191 ; RV32ZBB-NEXT: sext.h a0, a0
192 ; RV32ZBB-NEXT: max a2, a0, a1
193 ; RV32ZBB-NEXT: min a0, a0, a1
194 ; RV32ZBB-NEXT: sub a0, a0, a2
197 ; RV64ZBB-LABEL: abd_ext_i16:
199 ; RV64ZBB-NEXT: sext.h a0, a0
200 ; RV64ZBB-NEXT: sext.h a1, a1
201 ; RV64ZBB-NEXT: sub a0, a0, a1
202 ; RV64ZBB-NEXT: neg a1, a0
203 ; RV64ZBB-NEXT: min a0, a0, a1
205 %aext = sext i16 %a to i64
206 %bext = sext i16 %b to i64
207 %sub = sub i64 %aext, %bext
208 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
209 %nabs = sub i64 0, %abs
210 %trunc = trunc i64 %nabs to i16
214 define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
215 ; RV32I-LABEL: abd_ext_i16_i32:
217 ; RV32I-NEXT: slli a0, a0, 16
218 ; RV32I-NEXT: srai a0, a0, 16
219 ; RV32I-NEXT: blt a1, a0, .LBB4_2
220 ; RV32I-NEXT: # %bb.1:
221 ; RV32I-NEXT: sub a0, a1, a0
222 ; RV32I-NEXT: neg a0, a0
224 ; RV32I-NEXT: .LBB4_2:
225 ; RV32I-NEXT: sub a0, a0, a1
226 ; RV32I-NEXT: neg a0, a0
229 ; RV64I-LABEL: abd_ext_i16_i32:
231 ; RV64I-NEXT: slli a0, a0, 48
232 ; RV64I-NEXT: srai a0, a0, 48
233 ; RV64I-NEXT: sext.w a1, a1
234 ; RV64I-NEXT: sub a0, a0, a1
235 ; RV64I-NEXT: srai a1, a0, 63
236 ; RV64I-NEXT: xor a0, a0, a1
237 ; RV64I-NEXT: sub a0, a1, a0
240 ; RV32ZBB-LABEL: abd_ext_i16_i32:
242 ; RV32ZBB-NEXT: sext.h a0, a0
243 ; RV32ZBB-NEXT: max a2, a0, a1
244 ; RV32ZBB-NEXT: min a0, a0, a1
245 ; RV32ZBB-NEXT: sub a0, a0, a2
248 ; RV64ZBB-LABEL: abd_ext_i16_i32:
250 ; RV64ZBB-NEXT: sext.h a0, a0
251 ; RV64ZBB-NEXT: sext.w a1, a1
252 ; RV64ZBB-NEXT: sub a0, a0, a1
253 ; RV64ZBB-NEXT: neg a1, a0
254 ; RV64ZBB-NEXT: min a0, a0, a1
256 %aext = sext i16 %a to i64
257 %bext = sext i32 %b to i64
258 %sub = sub i64 %aext, %bext
259 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
260 %nabs = sub i64 0, %abs
261 %trunc = trunc i64 %nabs to i16
265 define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
266 ; RV32I-LABEL: abd_ext_i16_undef:
268 ; RV32I-NEXT: slli a1, a1, 16
269 ; RV32I-NEXT: slli a0, a0, 16
270 ; RV32I-NEXT: srai a1, a1, 16
271 ; RV32I-NEXT: srai a0, a0, 16
272 ; RV32I-NEXT: sub a0, a0, a1
273 ; RV32I-NEXT: srai a1, a0, 31
274 ; RV32I-NEXT: xor a0, a0, a1
275 ; RV32I-NEXT: sub a0, a1, a0
278 ; RV64I-LABEL: abd_ext_i16_undef:
280 ; RV64I-NEXT: slli a0, a0, 48
281 ; RV64I-NEXT: slli a1, a1, 48
282 ; RV64I-NEXT: srai a0, a0, 48
283 ; RV64I-NEXT: srai a1, a1, 48
284 ; RV64I-NEXT: sub a0, a0, a1
285 ; RV64I-NEXT: srai a1, a0, 63
286 ; RV64I-NEXT: xor a0, a0, a1
287 ; RV64I-NEXT: sub a0, a1, a0
290 ; RV32ZBB-LABEL: abd_ext_i16_undef:
292 ; RV32ZBB-NEXT: sext.h a1, a1
293 ; RV32ZBB-NEXT: sext.h a0, a0
294 ; RV32ZBB-NEXT: max a2, a0, a1
295 ; RV32ZBB-NEXT: min a0, a0, a1
296 ; RV32ZBB-NEXT: sub a0, a0, a2
299 ; RV64ZBB-LABEL: abd_ext_i16_undef:
301 ; RV64ZBB-NEXT: sext.h a0, a0
302 ; RV64ZBB-NEXT: sext.h a1, a1
303 ; RV64ZBB-NEXT: sub a0, a0, a1
304 ; RV64ZBB-NEXT: neg a1, a0
305 ; RV64ZBB-NEXT: min a0, a0, a1
307 %aext = sext i16 %a to i64
308 %bext = sext i16 %b to i64
309 %sub = sub i64 %aext, %bext
310 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
311 %nabs = sub i64 0, %abs
312 %trunc = trunc i64 %nabs to i16
316 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
317 ; RV32I-LABEL: abd_ext_i32:
319 ; RV32I-NEXT: blt a1, a0, .LBB6_2
320 ; RV32I-NEXT: # %bb.1:
321 ; RV32I-NEXT: sub a0, a1, a0
322 ; RV32I-NEXT: neg a0, a0
324 ; RV32I-NEXT: .LBB6_2:
325 ; RV32I-NEXT: sub a0, a0, a1
326 ; RV32I-NEXT: neg a0, a0
329 ; RV64I-LABEL: abd_ext_i32:
331 ; RV64I-NEXT: sext.w a0, a0
332 ; RV64I-NEXT: sext.w a1, a1
333 ; RV64I-NEXT: sub a0, a0, a1
334 ; RV64I-NEXT: srai a1, a0, 63
335 ; RV64I-NEXT: xor a0, a0, a1
336 ; RV64I-NEXT: sub a0, a1, a0
339 ; RV32ZBB-LABEL: abd_ext_i32:
341 ; RV32ZBB-NEXT: max a2, a0, a1
342 ; RV32ZBB-NEXT: min a0, a0, a1
343 ; RV32ZBB-NEXT: sub a0, a0, a2
346 ; RV64ZBB-LABEL: abd_ext_i32:
348 ; RV64ZBB-NEXT: sext.w a0, a0
349 ; RV64ZBB-NEXT: sext.w a1, a1
350 ; RV64ZBB-NEXT: sub a0, a0, a1
351 ; RV64ZBB-NEXT: neg a1, a0
352 ; RV64ZBB-NEXT: min a0, a0, a1
354 %aext = sext i32 %a to i64
355 %bext = sext i32 %b to i64
356 %sub = sub i64 %aext, %bext
357 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
358 %nabs = sub i64 0, %abs
359 %trunc = trunc i64 %nabs to i32
363 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
364 ; RV32I-LABEL: abd_ext_i32_i16:
366 ; RV32I-NEXT: slli a1, a1, 16
367 ; RV32I-NEXT: srai a1, a1, 16
368 ; RV32I-NEXT: blt a1, a0, .LBB7_2
369 ; RV32I-NEXT: # %bb.1:
370 ; RV32I-NEXT: sub a0, a1, a0
371 ; RV32I-NEXT: neg a0, a0
373 ; RV32I-NEXT: .LBB7_2:
374 ; RV32I-NEXT: sub a0, a0, a1
375 ; RV32I-NEXT: neg a0, a0
378 ; RV64I-LABEL: abd_ext_i32_i16:
380 ; RV64I-NEXT: sext.w a0, a0
381 ; RV64I-NEXT: slli a1, a1, 48
382 ; RV64I-NEXT: srai a1, a1, 48
383 ; RV64I-NEXT: sub a0, a0, a1
384 ; RV64I-NEXT: srai a1, a0, 63
385 ; RV64I-NEXT: xor a0, a0, a1
386 ; RV64I-NEXT: sub a0, a1, a0
389 ; RV32ZBB-LABEL: abd_ext_i32_i16:
391 ; RV32ZBB-NEXT: sext.h a1, a1
392 ; RV32ZBB-NEXT: max a2, a0, a1
393 ; RV32ZBB-NEXT: min a0, a0, a1
394 ; RV32ZBB-NEXT: sub a0, a0, a2
397 ; RV64ZBB-LABEL: abd_ext_i32_i16:
399 ; RV64ZBB-NEXT: sext.w a0, a0
400 ; RV64ZBB-NEXT: sext.h a1, a1
401 ; RV64ZBB-NEXT: sub a0, a0, a1
402 ; RV64ZBB-NEXT: neg a1, a0
403 ; RV64ZBB-NEXT: min a0, a0, a1
405 %aext = sext i32 %a to i64
406 %bext = sext i16 %b to i64
407 %sub = sub i64 %aext, %bext
408 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
409 %nabs = sub i64 0, %abs
410 %trunc = trunc i64 %nabs to i32
414 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
415 ; RV32I-LABEL: abd_ext_i32_undef:
417 ; RV32I-NEXT: blt a1, a0, .LBB8_2
418 ; RV32I-NEXT: # %bb.1:
419 ; RV32I-NEXT: sub a0, a1, a0
420 ; RV32I-NEXT: neg a0, a0
422 ; RV32I-NEXT: .LBB8_2:
423 ; RV32I-NEXT: sub a0, a0, a1
424 ; RV32I-NEXT: neg a0, a0
427 ; RV64I-LABEL: abd_ext_i32_undef:
429 ; RV64I-NEXT: sext.w a0, a0
430 ; RV64I-NEXT: sext.w a1, a1
431 ; RV64I-NEXT: sub a0, a0, a1
432 ; RV64I-NEXT: srai a1, a0, 63
433 ; RV64I-NEXT: xor a0, a0, a1
434 ; RV64I-NEXT: sub a0, a1, a0
437 ; RV32ZBB-LABEL: abd_ext_i32_undef:
439 ; RV32ZBB-NEXT: max a2, a0, a1
440 ; RV32ZBB-NEXT: min a0, a0, a1
441 ; RV32ZBB-NEXT: sub a0, a0, a2
444 ; RV64ZBB-LABEL: abd_ext_i32_undef:
446 ; RV64ZBB-NEXT: sext.w a0, a0
447 ; RV64ZBB-NEXT: sext.w a1, a1
448 ; RV64ZBB-NEXT: sub a0, a0, a1
449 ; RV64ZBB-NEXT: neg a1, a0
450 ; RV64ZBB-NEXT: min a0, a0, a1
452 %aext = sext i32 %a to i64
453 %bext = sext i32 %b to i64
454 %sub = sub i64 %aext, %bext
455 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
456 %nabs = sub i64 0, %abs
457 %trunc = trunc i64 %nabs to i32
461 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
462 ; RV32I-LABEL: abd_ext_i64:
464 ; RV32I-NEXT: sltu a4, a2, a0
465 ; RV32I-NEXT: mv a5, a4
466 ; RV32I-NEXT: beq a1, a3, .LBB9_2
467 ; RV32I-NEXT: # %bb.1:
468 ; RV32I-NEXT: slt a5, a3, a1
469 ; RV32I-NEXT: .LBB9_2:
470 ; RV32I-NEXT: bnez a5, .LBB9_4
471 ; RV32I-NEXT: # %bb.3:
472 ; RV32I-NEXT: sub a1, a3, a1
473 ; RV32I-NEXT: sub a1, a1, a4
474 ; RV32I-NEXT: sub a0, a2, a0
475 ; RV32I-NEXT: j .LBB9_5
476 ; RV32I-NEXT: .LBB9_4:
477 ; RV32I-NEXT: sltu a4, a0, a2
478 ; RV32I-NEXT: sub a1, a1, a3
479 ; RV32I-NEXT: sub a1, a1, a4
480 ; RV32I-NEXT: sub a0, a0, a2
481 ; RV32I-NEXT: .LBB9_5:
482 ; RV32I-NEXT: snez a2, a0
483 ; RV32I-NEXT: add a1, a1, a2
484 ; RV32I-NEXT: neg a1, a1
485 ; RV32I-NEXT: neg a0, a0
488 ; RV64I-LABEL: abd_ext_i64:
490 ; RV64I-NEXT: blt a1, a0, .LBB9_2
491 ; RV64I-NEXT: # %bb.1:
492 ; RV64I-NEXT: sub a0, a1, a0
493 ; RV64I-NEXT: neg a0, a0
495 ; RV64I-NEXT: .LBB9_2:
496 ; RV64I-NEXT: sub a0, a0, a1
497 ; RV64I-NEXT: neg a0, a0
500 ; RV32ZBB-LABEL: abd_ext_i64:
502 ; RV32ZBB-NEXT: sltu a4, a2, a0
503 ; RV32ZBB-NEXT: mv a5, a4
504 ; RV32ZBB-NEXT: beq a1, a3, .LBB9_2
505 ; RV32ZBB-NEXT: # %bb.1:
506 ; RV32ZBB-NEXT: slt a5, a3, a1
507 ; RV32ZBB-NEXT: .LBB9_2:
508 ; RV32ZBB-NEXT: bnez a5, .LBB9_4
509 ; RV32ZBB-NEXT: # %bb.3:
510 ; RV32ZBB-NEXT: sub a1, a3, a1
511 ; RV32ZBB-NEXT: sub a1, a1, a4
512 ; RV32ZBB-NEXT: sub a0, a2, a0
513 ; RV32ZBB-NEXT: j .LBB9_5
514 ; RV32ZBB-NEXT: .LBB9_4:
515 ; RV32ZBB-NEXT: sltu a4, a0, a2
516 ; RV32ZBB-NEXT: sub a1, a1, a3
517 ; RV32ZBB-NEXT: sub a1, a1, a4
518 ; RV32ZBB-NEXT: sub a0, a0, a2
519 ; RV32ZBB-NEXT: .LBB9_5:
520 ; RV32ZBB-NEXT: snez a2, a0
521 ; RV32ZBB-NEXT: add a1, a1, a2
522 ; RV32ZBB-NEXT: neg a1, a1
523 ; RV32ZBB-NEXT: neg a0, a0
526 ; RV64ZBB-LABEL: abd_ext_i64:
528 ; RV64ZBB-NEXT: max a2, a0, a1
529 ; RV64ZBB-NEXT: min a0, a0, a1
530 ; RV64ZBB-NEXT: sub a0, a0, a2
532 %aext = sext i64 %a to i128
533 %bext = sext i64 %b to i128
534 %sub = sub i128 %aext, %bext
535 %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
536 %nabs = sub i128 0, %abs
537 %trunc = trunc i128 %nabs to i64
541 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
542 ; RV32I-LABEL: abd_ext_i64_undef:
544 ; RV32I-NEXT: sltu a4, a2, a0
545 ; RV32I-NEXT: mv a5, a4
546 ; RV32I-NEXT: beq a1, a3, .LBB10_2
547 ; RV32I-NEXT: # %bb.1:
548 ; RV32I-NEXT: slt a5, a3, a1
549 ; RV32I-NEXT: .LBB10_2:
550 ; RV32I-NEXT: bnez a5, .LBB10_4
551 ; RV32I-NEXT: # %bb.3:
552 ; RV32I-NEXT: sub a1, a3, a1
553 ; RV32I-NEXT: sub a1, a1, a4
554 ; RV32I-NEXT: sub a0, a2, a0
555 ; RV32I-NEXT: j .LBB10_5
556 ; RV32I-NEXT: .LBB10_4:
557 ; RV32I-NEXT: sltu a4, a0, a2
558 ; RV32I-NEXT: sub a1, a1, a3
559 ; RV32I-NEXT: sub a1, a1, a4
560 ; RV32I-NEXT: sub a0, a0, a2
561 ; RV32I-NEXT: .LBB10_5:
562 ; RV32I-NEXT: snez a2, a0
563 ; RV32I-NEXT: add a1, a1, a2
564 ; RV32I-NEXT: neg a1, a1
565 ; RV32I-NEXT: neg a0, a0
568 ; RV64I-LABEL: abd_ext_i64_undef:
570 ; RV64I-NEXT: blt a1, a0, .LBB10_2
571 ; RV64I-NEXT: # %bb.1:
572 ; RV64I-NEXT: sub a0, a1, a0
573 ; RV64I-NEXT: neg a0, a0
575 ; RV64I-NEXT: .LBB10_2:
576 ; RV64I-NEXT: sub a0, a0, a1
577 ; RV64I-NEXT: neg a0, a0
580 ; RV32ZBB-LABEL: abd_ext_i64_undef:
582 ; RV32ZBB-NEXT: sltu a4, a2, a0
583 ; RV32ZBB-NEXT: mv a5, a4
584 ; RV32ZBB-NEXT: beq a1, a3, .LBB10_2
585 ; RV32ZBB-NEXT: # %bb.1:
586 ; RV32ZBB-NEXT: slt a5, a3, a1
587 ; RV32ZBB-NEXT: .LBB10_2:
588 ; RV32ZBB-NEXT: bnez a5, .LBB10_4
589 ; RV32ZBB-NEXT: # %bb.3:
590 ; RV32ZBB-NEXT: sub a1, a3, a1
591 ; RV32ZBB-NEXT: sub a1, a1, a4
592 ; RV32ZBB-NEXT: sub a0, a2, a0
593 ; RV32ZBB-NEXT: j .LBB10_5
594 ; RV32ZBB-NEXT: .LBB10_4:
595 ; RV32ZBB-NEXT: sltu a4, a0, a2
596 ; RV32ZBB-NEXT: sub a1, a1, a3
597 ; RV32ZBB-NEXT: sub a1, a1, a4
598 ; RV32ZBB-NEXT: sub a0, a0, a2
599 ; RV32ZBB-NEXT: .LBB10_5:
600 ; RV32ZBB-NEXT: snez a2, a0
601 ; RV32ZBB-NEXT: add a1, a1, a2
602 ; RV32ZBB-NEXT: neg a1, a1
603 ; RV32ZBB-NEXT: neg a0, a0
606 ; RV64ZBB-LABEL: abd_ext_i64_undef:
608 ; RV64ZBB-NEXT: max a2, a0, a1
609 ; RV64ZBB-NEXT: min a0, a0, a1
610 ; RV64ZBB-NEXT: sub a0, a0, a2
612 %aext = sext i64 %a to i128
613 %bext = sext i64 %b to i128
614 %sub = sub i128 %aext, %bext
615 %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
616 %nabs = sub i128 0, %abs
617 %trunc = trunc i128 %nabs to i64
621 define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
622 ; RV32I-LABEL: abd_ext_i128:
624 ; RV32I-NEXT: lw a3, 0(a1)
625 ; RV32I-NEXT: lw a4, 4(a1)
626 ; RV32I-NEXT: lw a6, 8(a1)
627 ; RV32I-NEXT: lw t1, 12(a1)
628 ; RV32I-NEXT: lw t0, 8(a2)
629 ; RV32I-NEXT: lw t2, 12(a2)
630 ; RV32I-NEXT: lw a1, 0(a2)
631 ; RV32I-NEXT: lw a2, 4(a2)
632 ; RV32I-NEXT: sltu t3, t0, a6
633 ; RV32I-NEXT: mv t4, t3
634 ; RV32I-NEXT: beq t1, t2, .LBB11_2
635 ; RV32I-NEXT: # %bb.1:
636 ; RV32I-NEXT: slt t4, t2, t1
637 ; RV32I-NEXT: .LBB11_2:
638 ; RV32I-NEXT: sltu a5, a1, a3
639 ; RV32I-NEXT: sltu t6, a2, a4
640 ; RV32I-NEXT: mv a7, a5
641 ; RV32I-NEXT: beq a4, a2, .LBB11_4
642 ; RV32I-NEXT: # %bb.3:
643 ; RV32I-NEXT: mv a7, t6
644 ; RV32I-NEXT: .LBB11_4:
645 ; RV32I-NEXT: addi sp, sp, -16
646 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
647 ; RV32I-NEXT: xor t5, t1, t2
648 ; RV32I-NEXT: xor s0, a6, t0
649 ; RV32I-NEXT: or t5, s0, t5
650 ; RV32I-NEXT: beqz t5, .LBB11_6
651 ; RV32I-NEXT: # %bb.5:
652 ; RV32I-NEXT: mv a7, t4
653 ; RV32I-NEXT: .LBB11_6:
654 ; RV32I-NEXT: mv t5, a5
655 ; RV32I-NEXT: beq a2, a4, .LBB11_8
656 ; RV32I-NEXT: # %bb.7:
657 ; RV32I-NEXT: mv t5, t6
658 ; RV32I-NEXT: .LBB11_8:
659 ; RV32I-NEXT: sltu t4, a3, a1
660 ; RV32I-NEXT: mv t6, t4
661 ; RV32I-NEXT: beq a4, a2, .LBB11_10
662 ; RV32I-NEXT: # %bb.9:
663 ; RV32I-NEXT: sltu t6, a4, a2
664 ; RV32I-NEXT: .LBB11_10:
665 ; RV32I-NEXT: bnez a7, .LBB11_12
666 ; RV32I-NEXT: # %bb.11:
667 ; RV32I-NEXT: sub t1, t2, t1
668 ; RV32I-NEXT: sub a6, t0, a6
669 ; RV32I-NEXT: sub t0, t1, t3
670 ; RV32I-NEXT: sltu t1, a6, t5
671 ; RV32I-NEXT: sub t0, t0, t1
672 ; RV32I-NEXT: sub a6, a6, t5
673 ; RV32I-NEXT: j .LBB11_13
674 ; RV32I-NEXT: .LBB11_12:
675 ; RV32I-NEXT: sltu t3, a6, t0
676 ; RV32I-NEXT: sub t1, t1, t2
677 ; RV32I-NEXT: sub a6, a6, t0
678 ; RV32I-NEXT: sub t0, t1, t3
679 ; RV32I-NEXT: sltu t1, a6, t6
680 ; RV32I-NEXT: sub t0, t0, t1
681 ; RV32I-NEXT: sub a6, a6, t6
682 ; RV32I-NEXT: .LBB11_13:
683 ; RV32I-NEXT: snez t1, a6
684 ; RV32I-NEXT: add t0, t0, t1
685 ; RV32I-NEXT: bnez a7, .LBB11_15
686 ; RV32I-NEXT: # %bb.14:
687 ; RV32I-NEXT: sub a2, a2, a4
688 ; RV32I-NEXT: sub a2, a2, a5
689 ; RV32I-NEXT: sub a1, a1, a3
690 ; RV32I-NEXT: j .LBB11_16
691 ; RV32I-NEXT: .LBB11_15:
692 ; RV32I-NEXT: sub a4, a4, a2
693 ; RV32I-NEXT: sub a2, a4, t4
694 ; RV32I-NEXT: sub a1, a3, a1
695 ; RV32I-NEXT: .LBB11_16:
696 ; RV32I-NEXT: or a3, a1, a2
697 ; RV32I-NEXT: neg a4, a6
698 ; RV32I-NEXT: neg a5, t0
699 ; RV32I-NEXT: snez a6, a1
700 ; RV32I-NEXT: neg a1, a1
701 ; RV32I-NEXT: snez a3, a3
702 ; RV32I-NEXT: add a2, a2, a6
703 ; RV32I-NEXT: sltu a6, a4, a3
704 ; RV32I-NEXT: neg a2, a2
705 ; RV32I-NEXT: sub a4, a4, a3
706 ; RV32I-NEXT: sub a3, a5, a6
707 ; RV32I-NEXT: sw a1, 0(a0)
708 ; RV32I-NEXT: sw a2, 4(a0)
709 ; RV32I-NEXT: sw a4, 8(a0)
710 ; RV32I-NEXT: sw a3, 12(a0)
711 ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
712 ; RV32I-NEXT: addi sp, sp, 16
715 ; RV64I-LABEL: abd_ext_i128:
717 ; RV64I-NEXT: sltu a4, a2, a0
718 ; RV64I-NEXT: mv a5, a4
719 ; RV64I-NEXT: beq a1, a3, .LBB11_2
720 ; RV64I-NEXT: # %bb.1:
721 ; RV64I-NEXT: slt a5, a3, a1
722 ; RV64I-NEXT: .LBB11_2:
723 ; RV64I-NEXT: bnez a5, .LBB11_4
724 ; RV64I-NEXT: # %bb.3:
725 ; RV64I-NEXT: sub a1, a3, a1
726 ; RV64I-NEXT: sub a1, a1, a4
727 ; RV64I-NEXT: sub a0, a2, a0
728 ; RV64I-NEXT: j .LBB11_5
729 ; RV64I-NEXT: .LBB11_4:
730 ; RV64I-NEXT: sltu a4, a0, a2
731 ; RV64I-NEXT: sub a1, a1, a3
732 ; RV64I-NEXT: sub a1, a1, a4
733 ; RV64I-NEXT: sub a0, a0, a2
734 ; RV64I-NEXT: .LBB11_5:
735 ; RV64I-NEXT: snez a2, a0
736 ; RV64I-NEXT: add a1, a1, a2
737 ; RV64I-NEXT: neg a1, a1
738 ; RV64I-NEXT: neg a0, a0
741 ; RV32ZBB-LABEL: abd_ext_i128:
743 ; RV32ZBB-NEXT: lw a3, 0(a1)
744 ; RV32ZBB-NEXT: lw a4, 4(a1)
745 ; RV32ZBB-NEXT: lw a6, 8(a1)
746 ; RV32ZBB-NEXT: lw t1, 12(a1)
747 ; RV32ZBB-NEXT: lw t0, 8(a2)
748 ; RV32ZBB-NEXT: lw t2, 12(a2)
749 ; RV32ZBB-NEXT: lw a1, 0(a2)
750 ; RV32ZBB-NEXT: lw a2, 4(a2)
751 ; RV32ZBB-NEXT: sltu t3, t0, a6
752 ; RV32ZBB-NEXT: mv t4, t3
753 ; RV32ZBB-NEXT: beq t1, t2, .LBB11_2
754 ; RV32ZBB-NEXT: # %bb.1:
755 ; RV32ZBB-NEXT: slt t4, t2, t1
756 ; RV32ZBB-NEXT: .LBB11_2:
757 ; RV32ZBB-NEXT: sltu a5, a1, a3
758 ; RV32ZBB-NEXT: sltu t6, a2, a4
759 ; RV32ZBB-NEXT: mv a7, a5
760 ; RV32ZBB-NEXT: beq a4, a2, .LBB11_4
761 ; RV32ZBB-NEXT: # %bb.3:
762 ; RV32ZBB-NEXT: mv a7, t6
763 ; RV32ZBB-NEXT: .LBB11_4:
764 ; RV32ZBB-NEXT: addi sp, sp, -16
765 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
766 ; RV32ZBB-NEXT: xor t5, t1, t2
767 ; RV32ZBB-NEXT: xor s0, a6, t0
768 ; RV32ZBB-NEXT: or t5, s0, t5
769 ; RV32ZBB-NEXT: beqz t5, .LBB11_6
770 ; RV32ZBB-NEXT: # %bb.5:
771 ; RV32ZBB-NEXT: mv a7, t4
772 ; RV32ZBB-NEXT: .LBB11_6:
773 ; RV32ZBB-NEXT: mv t5, a5
774 ; RV32ZBB-NEXT: beq a2, a4, .LBB11_8
775 ; RV32ZBB-NEXT: # %bb.7:
776 ; RV32ZBB-NEXT: mv t5, t6
777 ; RV32ZBB-NEXT: .LBB11_8:
778 ; RV32ZBB-NEXT: sltu t4, a3, a1
779 ; RV32ZBB-NEXT: mv t6, t4
780 ; RV32ZBB-NEXT: beq a4, a2, .LBB11_10
781 ; RV32ZBB-NEXT: # %bb.9:
782 ; RV32ZBB-NEXT: sltu t6, a4, a2
783 ; RV32ZBB-NEXT: .LBB11_10:
784 ; RV32ZBB-NEXT: bnez a7, .LBB11_12
785 ; RV32ZBB-NEXT: # %bb.11:
786 ; RV32ZBB-NEXT: sub t1, t2, t1
787 ; RV32ZBB-NEXT: sub a6, t0, a6
788 ; RV32ZBB-NEXT: sub t0, t1, t3
789 ; RV32ZBB-NEXT: sltu t1, a6, t5
790 ; RV32ZBB-NEXT: sub t0, t0, t1
791 ; RV32ZBB-NEXT: sub a6, a6, t5
792 ; RV32ZBB-NEXT: j .LBB11_13
793 ; RV32ZBB-NEXT: .LBB11_12:
794 ; RV32ZBB-NEXT: sltu t3, a6, t0
795 ; RV32ZBB-NEXT: sub t1, t1, t2
796 ; RV32ZBB-NEXT: sub a6, a6, t0
797 ; RV32ZBB-NEXT: sub t0, t1, t3
798 ; RV32ZBB-NEXT: sltu t1, a6, t6
799 ; RV32ZBB-NEXT: sub t0, t0, t1
800 ; RV32ZBB-NEXT: sub a6, a6, t6
801 ; RV32ZBB-NEXT: .LBB11_13:
802 ; RV32ZBB-NEXT: snez t1, a6
803 ; RV32ZBB-NEXT: add t0, t0, t1
804 ; RV32ZBB-NEXT: bnez a7, .LBB11_15
805 ; RV32ZBB-NEXT: # %bb.14:
806 ; RV32ZBB-NEXT: sub a2, a2, a4
807 ; RV32ZBB-NEXT: sub a2, a2, a5
808 ; RV32ZBB-NEXT: sub a1, a1, a3
809 ; RV32ZBB-NEXT: j .LBB11_16
810 ; RV32ZBB-NEXT: .LBB11_15:
811 ; RV32ZBB-NEXT: sub a4, a4, a2
812 ; RV32ZBB-NEXT: sub a2, a4, t4
813 ; RV32ZBB-NEXT: sub a1, a3, a1
814 ; RV32ZBB-NEXT: .LBB11_16:
815 ; RV32ZBB-NEXT: or a3, a1, a2
816 ; RV32ZBB-NEXT: neg a4, a6
817 ; RV32ZBB-NEXT: neg a5, t0
818 ; RV32ZBB-NEXT: snez a6, a1
819 ; RV32ZBB-NEXT: neg a1, a1
820 ; RV32ZBB-NEXT: snez a3, a3
821 ; RV32ZBB-NEXT: add a2, a2, a6
822 ; RV32ZBB-NEXT: sltu a6, a4, a3
823 ; RV32ZBB-NEXT: neg a2, a2
824 ; RV32ZBB-NEXT: sub a4, a4, a3
825 ; RV32ZBB-NEXT: sub a3, a5, a6
826 ; RV32ZBB-NEXT: sw a1, 0(a0)
827 ; RV32ZBB-NEXT: sw a2, 4(a0)
828 ; RV32ZBB-NEXT: sw a4, 8(a0)
829 ; RV32ZBB-NEXT: sw a3, 12(a0)
830 ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
831 ; RV32ZBB-NEXT: addi sp, sp, 16
834 ; RV64ZBB-LABEL: abd_ext_i128:
836 ; RV64ZBB-NEXT: sltu a4, a2, a0
837 ; RV64ZBB-NEXT: mv a5, a4
838 ; RV64ZBB-NEXT: beq a1, a3, .LBB11_2
839 ; RV64ZBB-NEXT: # %bb.1:
840 ; RV64ZBB-NEXT: slt a5, a3, a1
841 ; RV64ZBB-NEXT: .LBB11_2:
842 ; RV64ZBB-NEXT: bnez a5, .LBB11_4
843 ; RV64ZBB-NEXT: # %bb.3:
844 ; RV64ZBB-NEXT: sub a1, a3, a1
845 ; RV64ZBB-NEXT: sub a1, a1, a4
846 ; RV64ZBB-NEXT: sub a0, a2, a0
847 ; RV64ZBB-NEXT: j .LBB11_5
848 ; RV64ZBB-NEXT: .LBB11_4:
849 ; RV64ZBB-NEXT: sltu a4, a0, a2
850 ; RV64ZBB-NEXT: sub a1, a1, a3
851 ; RV64ZBB-NEXT: sub a1, a1, a4
852 ; RV64ZBB-NEXT: sub a0, a0, a2
853 ; RV64ZBB-NEXT: .LBB11_5:
854 ; RV64ZBB-NEXT: snez a2, a0
855 ; RV64ZBB-NEXT: add a1, a1, a2
856 ; RV64ZBB-NEXT: neg a1, a1
857 ; RV64ZBB-NEXT: neg a0, a0
859 %aext = sext i128 %a to i256
860 %bext = sext i128 %b to i256
861 %sub = sub i256 %aext, %bext
862 %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false)
863 %nabs = sub i256 0, %abs
864 %trunc = trunc i256 %nabs to i128
868 define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
869 ; RV32I-LABEL: abd_ext_i128_undef:
871 ; RV32I-NEXT: lw a3, 0(a1)
872 ; RV32I-NEXT: lw a4, 4(a1)
873 ; RV32I-NEXT: lw a6, 8(a1)
874 ; RV32I-NEXT: lw t1, 12(a1)
875 ; RV32I-NEXT: lw t0, 8(a2)
876 ; RV32I-NEXT: lw t2, 12(a2)
877 ; RV32I-NEXT: lw a1, 0(a2)
878 ; RV32I-NEXT: lw a2, 4(a2)
879 ; RV32I-NEXT: sltu t3, t0, a6
880 ; RV32I-NEXT: mv t4, t3
881 ; RV32I-NEXT: beq t1, t2, .LBB12_2
882 ; RV32I-NEXT: # %bb.1:
883 ; RV32I-NEXT: slt t4, t2, t1
884 ; RV32I-NEXT: .LBB12_2:
885 ; RV32I-NEXT: sltu a5, a1, a3
886 ; RV32I-NEXT: sltu t6, a2, a4
887 ; RV32I-NEXT: mv a7, a5
888 ; RV32I-NEXT: beq a4, a2, .LBB12_4
889 ; RV32I-NEXT: # %bb.3:
890 ; RV32I-NEXT: mv a7, t6
891 ; RV32I-NEXT: .LBB12_4:
892 ; RV32I-NEXT: addi sp, sp, -16
893 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
894 ; RV32I-NEXT: xor t5, t1, t2
895 ; RV32I-NEXT: xor s0, a6, t0
896 ; RV32I-NEXT: or t5, s0, t5
897 ; RV32I-NEXT: beqz t5, .LBB12_6
898 ; RV32I-NEXT: # %bb.5:
899 ; RV32I-NEXT: mv a7, t4
900 ; RV32I-NEXT: .LBB12_6:
901 ; RV32I-NEXT: mv t5, a5
902 ; RV32I-NEXT: beq a2, a4, .LBB12_8
903 ; RV32I-NEXT: # %bb.7:
904 ; RV32I-NEXT: mv t5, t6
905 ; RV32I-NEXT: .LBB12_8:
906 ; RV32I-NEXT: sltu t4, a3, a1
907 ; RV32I-NEXT: mv t6, t4
908 ; RV32I-NEXT: beq a4, a2, .LBB12_10
909 ; RV32I-NEXT: # %bb.9:
910 ; RV32I-NEXT: sltu t6, a4, a2
911 ; RV32I-NEXT: .LBB12_10:
912 ; RV32I-NEXT: bnez a7, .LBB12_12
913 ; RV32I-NEXT: # %bb.11:
914 ; RV32I-NEXT: sub t1, t2, t1
915 ; RV32I-NEXT: sub a6, t0, a6
916 ; RV32I-NEXT: sub t0, t1, t3
917 ; RV32I-NEXT: sltu t1, a6, t5
918 ; RV32I-NEXT: sub t0, t0, t1
919 ; RV32I-NEXT: sub a6, a6, t5
920 ; RV32I-NEXT: j .LBB12_13
921 ; RV32I-NEXT: .LBB12_12:
922 ; RV32I-NEXT: sltu t3, a6, t0
923 ; RV32I-NEXT: sub t1, t1, t2
924 ; RV32I-NEXT: sub a6, a6, t0
925 ; RV32I-NEXT: sub t0, t1, t3
926 ; RV32I-NEXT: sltu t1, a6, t6
927 ; RV32I-NEXT: sub t0, t0, t1
928 ; RV32I-NEXT: sub a6, a6, t6
929 ; RV32I-NEXT: .LBB12_13:
930 ; RV32I-NEXT: snez t1, a6
931 ; RV32I-NEXT: add t0, t0, t1
932 ; RV32I-NEXT: bnez a7, .LBB12_15
933 ; RV32I-NEXT: # %bb.14:
934 ; RV32I-NEXT: sub a2, a2, a4
935 ; RV32I-NEXT: sub a2, a2, a5
936 ; RV32I-NEXT: sub a1, a1, a3
937 ; RV32I-NEXT: j .LBB12_16
938 ; RV32I-NEXT: .LBB12_15:
939 ; RV32I-NEXT: sub a4, a4, a2
940 ; RV32I-NEXT: sub a2, a4, t4
941 ; RV32I-NEXT: sub a1, a3, a1
942 ; RV32I-NEXT: .LBB12_16:
943 ; RV32I-NEXT: or a3, a1, a2
944 ; RV32I-NEXT: neg a4, a6
945 ; RV32I-NEXT: neg a5, t0
946 ; RV32I-NEXT: snez a6, a1
947 ; RV32I-NEXT: neg a1, a1
948 ; RV32I-NEXT: snez a3, a3
949 ; RV32I-NEXT: add a2, a2, a6
950 ; RV32I-NEXT: sltu a6, a4, a3
951 ; RV32I-NEXT: neg a2, a2
952 ; RV32I-NEXT: sub a4, a4, a3
953 ; RV32I-NEXT: sub a3, a5, a6
954 ; RV32I-NEXT: sw a1, 0(a0)
955 ; RV32I-NEXT: sw a2, 4(a0)
956 ; RV32I-NEXT: sw a4, 8(a0)
957 ; RV32I-NEXT: sw a3, 12(a0)
958 ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
959 ; RV32I-NEXT: addi sp, sp, 16
962 ; RV64I-LABEL: abd_ext_i128_undef:
964 ; RV64I-NEXT: sltu a4, a2, a0
965 ; RV64I-NEXT: mv a5, a4
966 ; RV64I-NEXT: beq a1, a3, .LBB12_2
967 ; RV64I-NEXT: # %bb.1:
968 ; RV64I-NEXT: slt a5, a3, a1
969 ; RV64I-NEXT: .LBB12_2:
970 ; RV64I-NEXT: bnez a5, .LBB12_4
971 ; RV64I-NEXT: # %bb.3:
972 ; RV64I-NEXT: sub a1, a3, a1
973 ; RV64I-NEXT: sub a1, a1, a4
974 ; RV64I-NEXT: sub a0, a2, a0
975 ; RV64I-NEXT: j .LBB12_5
976 ; RV64I-NEXT: .LBB12_4:
977 ; RV64I-NEXT: sltu a4, a0, a2
978 ; RV64I-NEXT: sub a1, a1, a3
979 ; RV64I-NEXT: sub a1, a1, a4
980 ; RV64I-NEXT: sub a0, a0, a2
981 ; RV64I-NEXT: .LBB12_5:
982 ; RV64I-NEXT: snez a2, a0
983 ; RV64I-NEXT: add a1, a1, a2
984 ; RV64I-NEXT: neg a1, a1
985 ; RV64I-NEXT: neg a0, a0
988 ; RV32ZBB-LABEL: abd_ext_i128_undef:
990 ; RV32ZBB-NEXT: lw a3, 0(a1)
991 ; RV32ZBB-NEXT: lw a4, 4(a1)
992 ; RV32ZBB-NEXT: lw a6, 8(a1)
993 ; RV32ZBB-NEXT: lw t1, 12(a1)
994 ; RV32ZBB-NEXT: lw t0, 8(a2)
995 ; RV32ZBB-NEXT: lw t2, 12(a2)
996 ; RV32ZBB-NEXT: lw a1, 0(a2)
997 ; RV32ZBB-NEXT: lw a2, 4(a2)
998 ; RV32ZBB-NEXT: sltu t3, t0, a6
999 ; RV32ZBB-NEXT: mv t4, t3
1000 ; RV32ZBB-NEXT: beq t1, t2, .LBB12_2
1001 ; RV32ZBB-NEXT: # %bb.1:
1002 ; RV32ZBB-NEXT: slt t4, t2, t1
1003 ; RV32ZBB-NEXT: .LBB12_2:
1004 ; RV32ZBB-NEXT: sltu a5, a1, a3
1005 ; RV32ZBB-NEXT: sltu t6, a2, a4
1006 ; RV32ZBB-NEXT: mv a7, a5
1007 ; RV32ZBB-NEXT: beq a4, a2, .LBB12_4
1008 ; RV32ZBB-NEXT: # %bb.3:
1009 ; RV32ZBB-NEXT: mv a7, t6
1010 ; RV32ZBB-NEXT: .LBB12_4:
1011 ; RV32ZBB-NEXT: addi sp, sp, -16
1012 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
1013 ; RV32ZBB-NEXT: xor t5, t1, t2
1014 ; RV32ZBB-NEXT: xor s0, a6, t0
1015 ; RV32ZBB-NEXT: or t5, s0, t5
1016 ; RV32ZBB-NEXT: beqz t5, .LBB12_6
1017 ; RV32ZBB-NEXT: # %bb.5:
1018 ; RV32ZBB-NEXT: mv a7, t4
1019 ; RV32ZBB-NEXT: .LBB12_6:
1020 ; RV32ZBB-NEXT: mv t5, a5
1021 ; RV32ZBB-NEXT: beq a2, a4, .LBB12_8
1022 ; RV32ZBB-NEXT: # %bb.7:
1023 ; RV32ZBB-NEXT: mv t5, t6
1024 ; RV32ZBB-NEXT: .LBB12_8:
1025 ; RV32ZBB-NEXT: sltu t4, a3, a1
1026 ; RV32ZBB-NEXT: mv t6, t4
1027 ; RV32ZBB-NEXT: beq a4, a2, .LBB12_10
1028 ; RV32ZBB-NEXT: # %bb.9:
1029 ; RV32ZBB-NEXT: sltu t6, a4, a2
1030 ; RV32ZBB-NEXT: .LBB12_10:
1031 ; RV32ZBB-NEXT: bnez a7, .LBB12_12
1032 ; RV32ZBB-NEXT: # %bb.11:
1033 ; RV32ZBB-NEXT: sub t1, t2, t1
1034 ; RV32ZBB-NEXT: sub a6, t0, a6
1035 ; RV32ZBB-NEXT: sub t0, t1, t3
1036 ; RV32ZBB-NEXT: sltu t1, a6, t5
1037 ; RV32ZBB-NEXT: sub t0, t0, t1
1038 ; RV32ZBB-NEXT: sub a6, a6, t5
1039 ; RV32ZBB-NEXT: j .LBB12_13
1040 ; RV32ZBB-NEXT: .LBB12_12:
1041 ; RV32ZBB-NEXT: sltu t3, a6, t0
1042 ; RV32ZBB-NEXT: sub t1, t1, t2
1043 ; RV32ZBB-NEXT: sub a6, a6, t0
1044 ; RV32ZBB-NEXT: sub t0, t1, t3
1045 ; RV32ZBB-NEXT: sltu t1, a6, t6
1046 ; RV32ZBB-NEXT: sub t0, t0, t1
1047 ; RV32ZBB-NEXT: sub a6, a6, t6
1048 ; RV32ZBB-NEXT: .LBB12_13:
1049 ; RV32ZBB-NEXT: snez t1, a6
1050 ; RV32ZBB-NEXT: add t0, t0, t1
1051 ; RV32ZBB-NEXT: bnez a7, .LBB12_15
1052 ; RV32ZBB-NEXT: # %bb.14:
1053 ; RV32ZBB-NEXT: sub a2, a2, a4
1054 ; RV32ZBB-NEXT: sub a2, a2, a5
1055 ; RV32ZBB-NEXT: sub a1, a1, a3
1056 ; RV32ZBB-NEXT: j .LBB12_16
1057 ; RV32ZBB-NEXT: .LBB12_15:
1058 ; RV32ZBB-NEXT: sub a4, a4, a2
1059 ; RV32ZBB-NEXT: sub a2, a4, t4
1060 ; RV32ZBB-NEXT: sub a1, a3, a1
1061 ; RV32ZBB-NEXT: .LBB12_16:
1062 ; RV32ZBB-NEXT: or a3, a1, a2
1063 ; RV32ZBB-NEXT: neg a4, a6
1064 ; RV32ZBB-NEXT: neg a5, t0
1065 ; RV32ZBB-NEXT: snez a6, a1
1066 ; RV32ZBB-NEXT: neg a1, a1
1067 ; RV32ZBB-NEXT: snez a3, a3
1068 ; RV32ZBB-NEXT: add a2, a2, a6
1069 ; RV32ZBB-NEXT: sltu a6, a4, a3
1070 ; RV32ZBB-NEXT: neg a2, a2
1071 ; RV32ZBB-NEXT: sub a4, a4, a3
1072 ; RV32ZBB-NEXT: sub a3, a5, a6
1073 ; RV32ZBB-NEXT: sw a1, 0(a0)
1074 ; RV32ZBB-NEXT: sw a2, 4(a0)
1075 ; RV32ZBB-NEXT: sw a4, 8(a0)
1076 ; RV32ZBB-NEXT: sw a3, 12(a0)
1077 ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
1078 ; RV32ZBB-NEXT: addi sp, sp, 16
1081 ; RV64ZBB-LABEL: abd_ext_i128_undef:
1083 ; RV64ZBB-NEXT: sltu a4, a2, a0
1084 ; RV64ZBB-NEXT: mv a5, a4
1085 ; RV64ZBB-NEXT: beq a1, a3, .LBB12_2
1086 ; RV64ZBB-NEXT: # %bb.1:
1087 ; RV64ZBB-NEXT: slt a5, a3, a1
1088 ; RV64ZBB-NEXT: .LBB12_2:
1089 ; RV64ZBB-NEXT: bnez a5, .LBB12_4
1090 ; RV64ZBB-NEXT: # %bb.3:
1091 ; RV64ZBB-NEXT: sub a1, a3, a1
1092 ; RV64ZBB-NEXT: sub a1, a1, a4
1093 ; RV64ZBB-NEXT: sub a0, a2, a0
1094 ; RV64ZBB-NEXT: j .LBB12_5
1095 ; RV64ZBB-NEXT: .LBB12_4:
1096 ; RV64ZBB-NEXT: sltu a4, a0, a2
1097 ; RV64ZBB-NEXT: sub a1, a1, a3
1098 ; RV64ZBB-NEXT: sub a1, a1, a4
1099 ; RV64ZBB-NEXT: sub a0, a0, a2
1100 ; RV64ZBB-NEXT: .LBB12_5:
1101 ; RV64ZBB-NEXT: snez a2, a0
1102 ; RV64ZBB-NEXT: add a1, a1, a2
1103 ; RV64ZBB-NEXT: neg a1, a1
1104 ; RV64ZBB-NEXT: neg a0, a0
1106 %aext = sext i128 %a to i256
1107 %bext = sext i128 %b to i256
1108 %sub = sub i256 %aext, %bext
1109 %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true)
1110 %nabs = sub i256 0, %abs
1111 %trunc = trunc i256 %nabs to i128
1116 ; sub(smin(a,b),smax(a,b)) -> nabds(a,b)
1119 define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
1120 ; RV32I-LABEL: abd_minmax_i8:
1122 ; RV32I-NEXT: slli a1, a1, 24
1123 ; RV32I-NEXT: slli a0, a0, 24
1124 ; RV32I-NEXT: srai a1, a1, 24
1125 ; RV32I-NEXT: srai a0, a0, 24
1126 ; RV32I-NEXT: mv a2, a0
1127 ; RV32I-NEXT: bge a0, a1, .LBB13_3
1128 ; RV32I-NEXT: # %bb.1:
1129 ; RV32I-NEXT: bge a1, a0, .LBB13_4
1130 ; RV32I-NEXT: .LBB13_2:
1131 ; RV32I-NEXT: sub a0, a2, a0
1133 ; RV32I-NEXT: .LBB13_3:
1134 ; RV32I-NEXT: mv a2, a1
1135 ; RV32I-NEXT: blt a1, a0, .LBB13_2
1136 ; RV32I-NEXT: .LBB13_4:
1137 ; RV32I-NEXT: sub a0, a2, a1
1140 ; RV64I-LABEL: abd_minmax_i8:
1142 ; RV64I-NEXT: slli a1, a1, 56
1143 ; RV64I-NEXT: slli a0, a0, 56
1144 ; RV64I-NEXT: srai a1, a1, 56
1145 ; RV64I-NEXT: srai a0, a0, 56
1146 ; RV64I-NEXT: mv a2, a0
1147 ; RV64I-NEXT: bge a0, a1, .LBB13_3
1148 ; RV64I-NEXT: # %bb.1:
1149 ; RV64I-NEXT: bge a1, a0, .LBB13_4
1150 ; RV64I-NEXT: .LBB13_2:
1151 ; RV64I-NEXT: sub a0, a2, a0
1153 ; RV64I-NEXT: .LBB13_3:
1154 ; RV64I-NEXT: mv a2, a1
1155 ; RV64I-NEXT: blt a1, a0, .LBB13_2
1156 ; RV64I-NEXT: .LBB13_4:
1157 ; RV64I-NEXT: sub a0, a2, a1
1160 ; ZBB-LABEL: abd_minmax_i8:
1162 ; ZBB-NEXT: sext.b a1, a1
1163 ; ZBB-NEXT: sext.b a0, a0
1164 ; ZBB-NEXT: min a2, a0, a1
1165 ; ZBB-NEXT: max a0, a0, a1
1166 ; ZBB-NEXT: sub a0, a2, a0
1168 %min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
1169 %max = call i8 @llvm.smax.i8(i8 %a, i8 %b)
1170 %sub = sub i8 %min, %max
1174 define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
1175 ; RV32I-LABEL: abd_minmax_i16:
1177 ; RV32I-NEXT: slli a1, a1, 16
1178 ; RV32I-NEXT: slli a0, a0, 16
1179 ; RV32I-NEXT: srai a1, a1, 16
1180 ; RV32I-NEXT: srai a0, a0, 16
1181 ; RV32I-NEXT: mv a2, a0
1182 ; RV32I-NEXT: bge a0, a1, .LBB14_3
1183 ; RV32I-NEXT: # %bb.1:
1184 ; RV32I-NEXT: bge a1, a0, .LBB14_4
1185 ; RV32I-NEXT: .LBB14_2:
1186 ; RV32I-NEXT: sub a0, a2, a0
1188 ; RV32I-NEXT: .LBB14_3:
1189 ; RV32I-NEXT: mv a2, a1
1190 ; RV32I-NEXT: blt a1, a0, .LBB14_2
1191 ; RV32I-NEXT: .LBB14_4:
1192 ; RV32I-NEXT: sub a0, a2, a1
1195 ; RV64I-LABEL: abd_minmax_i16:
1197 ; RV64I-NEXT: slli a1, a1, 48
1198 ; RV64I-NEXT: slli a0, a0, 48
1199 ; RV64I-NEXT: srai a1, a1, 48
1200 ; RV64I-NEXT: srai a0, a0, 48
1201 ; RV64I-NEXT: mv a2, a0
1202 ; RV64I-NEXT: bge a0, a1, .LBB14_3
1203 ; RV64I-NEXT: # %bb.1:
1204 ; RV64I-NEXT: bge a1, a0, .LBB14_4
1205 ; RV64I-NEXT: .LBB14_2:
1206 ; RV64I-NEXT: sub a0, a2, a0
1208 ; RV64I-NEXT: .LBB14_3:
1209 ; RV64I-NEXT: mv a2, a1
1210 ; RV64I-NEXT: blt a1, a0, .LBB14_2
1211 ; RV64I-NEXT: .LBB14_4:
1212 ; RV64I-NEXT: sub a0, a2, a1
1215 ; ZBB-LABEL: abd_minmax_i16:
1217 ; ZBB-NEXT: sext.h a1, a1
1218 ; ZBB-NEXT: sext.h a0, a0
1219 ; ZBB-NEXT: min a2, a0, a1
1220 ; ZBB-NEXT: max a0, a0, a1
1221 ; ZBB-NEXT: sub a0, a2, a0
1223 %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
1224 %max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
1225 %sub = sub i16 %min, %max
1229 define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
1230 ; RV32I-LABEL: abd_minmax_i32:
1232 ; RV32I-NEXT: mv a2, a0
1233 ; RV32I-NEXT: bge a0, a1, .LBB15_3
1234 ; RV32I-NEXT: # %bb.1:
1235 ; RV32I-NEXT: bge a1, a0, .LBB15_4
1236 ; RV32I-NEXT: .LBB15_2:
1237 ; RV32I-NEXT: sub a0, a2, a0
1239 ; RV32I-NEXT: .LBB15_3:
1240 ; RV32I-NEXT: mv a2, a1
1241 ; RV32I-NEXT: blt a1, a0, .LBB15_2
1242 ; RV32I-NEXT: .LBB15_4:
1243 ; RV32I-NEXT: sub a0, a2, a1
1246 ; RV64I-LABEL: abd_minmax_i32:
1248 ; RV64I-NEXT: sext.w a1, a1
1249 ; RV64I-NEXT: sext.w a0, a0
1250 ; RV64I-NEXT: mv a2, a0
1251 ; RV64I-NEXT: bge a0, a1, .LBB15_3
1252 ; RV64I-NEXT: # %bb.1:
1253 ; RV64I-NEXT: bge a1, a0, .LBB15_4
1254 ; RV64I-NEXT: .LBB15_2:
1255 ; RV64I-NEXT: subw a0, a2, a0
1257 ; RV64I-NEXT: .LBB15_3:
1258 ; RV64I-NEXT: mv a2, a1
1259 ; RV64I-NEXT: blt a1, a0, .LBB15_2
1260 ; RV64I-NEXT: .LBB15_4:
1261 ; RV64I-NEXT: subw a0, a2, a1
1264 ; RV32ZBB-LABEL: abd_minmax_i32:
1266 ; RV32ZBB-NEXT: min a2, a0, a1
1267 ; RV32ZBB-NEXT: max a0, a0, a1
1268 ; RV32ZBB-NEXT: sub a0, a2, a0
1271 ; RV64ZBB-LABEL: abd_minmax_i32:
1273 ; RV64ZBB-NEXT: sext.w a1, a1
1274 ; RV64ZBB-NEXT: sext.w a0, a0
1275 ; RV64ZBB-NEXT: min a2, a0, a1
1276 ; RV64ZBB-NEXT: max a0, a0, a1
1277 ; RV64ZBB-NEXT: subw a0, a2, a0
1279 %min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
1280 %max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
1281 %sub = sub i32 %min, %max
1285 define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
1286 ; RV32I-LABEL: abd_minmax_i64:
1288 ; RV32I-NEXT: beq a1, a3, .LBB16_2
1289 ; RV32I-NEXT: # %bb.1:
1290 ; RV32I-NEXT: slt a6, a1, a3
1291 ; RV32I-NEXT: j .LBB16_3
1292 ; RV32I-NEXT: .LBB16_2:
1293 ; RV32I-NEXT: sltu a6, a0, a2
1294 ; RV32I-NEXT: .LBB16_3:
1295 ; RV32I-NEXT: mv a4, a1
1296 ; RV32I-NEXT: mv a5, a0
1297 ; RV32I-NEXT: bnez a6, .LBB16_5
1298 ; RV32I-NEXT: # %bb.4:
1299 ; RV32I-NEXT: mv a4, a3
1300 ; RV32I-NEXT: mv a5, a2
1301 ; RV32I-NEXT: .LBB16_5:
1302 ; RV32I-NEXT: beq a1, a3, .LBB16_7
1303 ; RV32I-NEXT: # %bb.6:
1304 ; RV32I-NEXT: slt a6, a3, a1
1305 ; RV32I-NEXT: beqz a6, .LBB16_8
1306 ; RV32I-NEXT: j .LBB16_9
1307 ; RV32I-NEXT: .LBB16_7:
1308 ; RV32I-NEXT: sltu a6, a2, a0
1309 ; RV32I-NEXT: bnez a6, .LBB16_9
1310 ; RV32I-NEXT: .LBB16_8:
1311 ; RV32I-NEXT: mv a1, a3
1312 ; RV32I-NEXT: mv a0, a2
1313 ; RV32I-NEXT: .LBB16_9:
1314 ; RV32I-NEXT: sltu a2, a5, a0
1315 ; RV32I-NEXT: sub a1, a4, a1
1316 ; RV32I-NEXT: sub a1, a1, a2
1317 ; RV32I-NEXT: sub a0, a5, a0
1320 ; RV64I-LABEL: abd_minmax_i64:
1322 ; RV64I-NEXT: mv a2, a0
1323 ; RV64I-NEXT: bge a0, a1, .LBB16_3
1324 ; RV64I-NEXT: # %bb.1:
1325 ; RV64I-NEXT: bge a1, a0, .LBB16_4
1326 ; RV64I-NEXT: .LBB16_2:
1327 ; RV64I-NEXT: sub a0, a2, a0
1329 ; RV64I-NEXT: .LBB16_3:
1330 ; RV64I-NEXT: mv a2, a1
1331 ; RV64I-NEXT: blt a1, a0, .LBB16_2
1332 ; RV64I-NEXT: .LBB16_4:
1333 ; RV64I-NEXT: sub a0, a2, a1
1336 ; RV32ZBB-LABEL: abd_minmax_i64:
1338 ; RV32ZBB-NEXT: beq a1, a3, .LBB16_2
1339 ; RV32ZBB-NEXT: # %bb.1:
1340 ; RV32ZBB-NEXT: slt a6, a1, a3
1341 ; RV32ZBB-NEXT: j .LBB16_3
1342 ; RV32ZBB-NEXT: .LBB16_2:
1343 ; RV32ZBB-NEXT: sltu a6, a0, a2
1344 ; RV32ZBB-NEXT: .LBB16_3:
1345 ; RV32ZBB-NEXT: mv a4, a1
1346 ; RV32ZBB-NEXT: mv a5, a0
1347 ; RV32ZBB-NEXT: bnez a6, .LBB16_5
1348 ; RV32ZBB-NEXT: # %bb.4:
1349 ; RV32ZBB-NEXT: mv a4, a3
1350 ; RV32ZBB-NEXT: mv a5, a2
1351 ; RV32ZBB-NEXT: .LBB16_5:
1352 ; RV32ZBB-NEXT: beq a1, a3, .LBB16_7
1353 ; RV32ZBB-NEXT: # %bb.6:
1354 ; RV32ZBB-NEXT: slt a6, a3, a1
1355 ; RV32ZBB-NEXT: beqz a6, .LBB16_8
1356 ; RV32ZBB-NEXT: j .LBB16_9
1357 ; RV32ZBB-NEXT: .LBB16_7:
1358 ; RV32ZBB-NEXT: sltu a6, a2, a0
1359 ; RV32ZBB-NEXT: bnez a6, .LBB16_9
1360 ; RV32ZBB-NEXT: .LBB16_8:
1361 ; RV32ZBB-NEXT: mv a1, a3
1362 ; RV32ZBB-NEXT: mv a0, a2
1363 ; RV32ZBB-NEXT: .LBB16_9:
1364 ; RV32ZBB-NEXT: sltu a2, a5, a0
1365 ; RV32ZBB-NEXT: sub a1, a4, a1
1366 ; RV32ZBB-NEXT: sub a1, a1, a2
1367 ; RV32ZBB-NEXT: sub a0, a5, a0
1370 ; RV64ZBB-LABEL: abd_minmax_i64:
1372 ; RV64ZBB-NEXT: min a2, a0, a1
1373 ; RV64ZBB-NEXT: max a0, a0, a1
1374 ; RV64ZBB-NEXT: sub a0, a2, a0
1376 %min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
1377 %max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
1378 %sub = sub i64 %min, %max
1382 define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
1383 ; RV32I-LABEL: abd_minmax_i128:
1385 ; RV32I-NEXT: lw a6, 4(a2)
1386 ; RV32I-NEXT: lw a7, 8(a2)
1387 ; RV32I-NEXT: lw t0, 12(a2)
1388 ; RV32I-NEXT: lw a5, 12(a1)
1389 ; RV32I-NEXT: lw a3, 4(a1)
1390 ; RV32I-NEXT: lw a4, 8(a1)
1391 ; RV32I-NEXT: beq a5, t0, .LBB17_2
1392 ; RV32I-NEXT: # %bb.1:
1393 ; RV32I-NEXT: slt t1, a5, t0
1394 ; RV32I-NEXT: j .LBB17_3
1395 ; RV32I-NEXT: .LBB17_2:
1396 ; RV32I-NEXT: sltu t1, a4, a7
1397 ; RV32I-NEXT: .LBB17_3:
1398 ; RV32I-NEXT: lw t2, 0(a2)
1399 ; RV32I-NEXT: lw a1, 0(a1)
1400 ; RV32I-NEXT: beq a3, a6, .LBB17_5
1401 ; RV32I-NEXT: # %bb.4:
1402 ; RV32I-NEXT: sltu t6, a3, a6
1403 ; RV32I-NEXT: j .LBB17_6
1404 ; RV32I-NEXT: .LBB17_5:
1405 ; RV32I-NEXT: sltu t6, a1, t2
1406 ; RV32I-NEXT: .LBB17_6:
1407 ; RV32I-NEXT: xor a2, a5, t0
1408 ; RV32I-NEXT: xor t3, a4, a7
1409 ; RV32I-NEXT: or t5, t3, a2
1410 ; RV32I-NEXT: beqz t5, .LBB17_8
1411 ; RV32I-NEXT: # %bb.7:
1412 ; RV32I-NEXT: mv t6, t1
1413 ; RV32I-NEXT: .LBB17_8:
1414 ; RV32I-NEXT: mv a2, a1
1415 ; RV32I-NEXT: mv t1, a3
1416 ; RV32I-NEXT: mv t4, a5
1417 ; RV32I-NEXT: mv t3, a4
1418 ; RV32I-NEXT: bnez t6, .LBB17_10
1419 ; RV32I-NEXT: # %bb.9:
1420 ; RV32I-NEXT: mv a2, t2
1421 ; RV32I-NEXT: mv t1, a6
1422 ; RV32I-NEXT: mv t4, t0
1423 ; RV32I-NEXT: mv t3, a7
1424 ; RV32I-NEXT: .LBB17_10:
1425 ; RV32I-NEXT: beq a5, t0, .LBB17_12
1426 ; RV32I-NEXT: # %bb.11:
1427 ; RV32I-NEXT: slt t6, t0, a5
1428 ; RV32I-NEXT: j .LBB17_13
1429 ; RV32I-NEXT: .LBB17_12:
1430 ; RV32I-NEXT: sltu t6, a7, a4
1431 ; RV32I-NEXT: .LBB17_13:
1432 ; RV32I-NEXT: addi sp, sp, -16
1433 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
1434 ; RV32I-NEXT: beq a3, a6, .LBB17_15
1435 ; RV32I-NEXT: # %bb.14:
1436 ; RV32I-NEXT: sltu s0, a6, a3
1437 ; RV32I-NEXT: bnez t5, .LBB17_16
1438 ; RV32I-NEXT: j .LBB17_17
1439 ; RV32I-NEXT: .LBB17_15:
1440 ; RV32I-NEXT: sltu s0, t2, a1
1441 ; RV32I-NEXT: beqz t5, .LBB17_17
1442 ; RV32I-NEXT: .LBB17_16:
1443 ; RV32I-NEXT: mv s0, t6
1444 ; RV32I-NEXT: .LBB17_17:
1445 ; RV32I-NEXT: bnez s0, .LBB17_19
1446 ; RV32I-NEXT: # %bb.18:
1447 ; RV32I-NEXT: mv a1, t2
1448 ; RV32I-NEXT: mv a3, a6
1449 ; RV32I-NEXT: mv a5, t0
1450 ; RV32I-NEXT: mv a4, a7
1451 ; RV32I-NEXT: .LBB17_19:
1452 ; RV32I-NEXT: sltu a7, t3, a4
1453 ; RV32I-NEXT: sub a5, t4, a5
1454 ; RV32I-NEXT: sltu a6, a2, a1
1455 ; RV32I-NEXT: sub a5, a5, a7
1456 ; RV32I-NEXT: mv a7, a6
1457 ; RV32I-NEXT: beq t1, a3, .LBB17_21
1458 ; RV32I-NEXT: # %bb.20:
1459 ; RV32I-NEXT: sltu a7, t1, a3
1460 ; RV32I-NEXT: .LBB17_21:
1461 ; RV32I-NEXT: sub a4, t3, a4
1462 ; RV32I-NEXT: sub a3, t1, a3
1463 ; RV32I-NEXT: sub a2, a2, a1
1464 ; RV32I-NEXT: sltu a1, a4, a7
1465 ; RV32I-NEXT: sub a4, a4, a7
1466 ; RV32I-NEXT: sub a3, a3, a6
1467 ; RV32I-NEXT: sub a5, a5, a1
1468 ; RV32I-NEXT: sw a2, 0(a0)
1469 ; RV32I-NEXT: sw a3, 4(a0)
1470 ; RV32I-NEXT: sw a4, 8(a0)
1471 ; RV32I-NEXT: sw a5, 12(a0)
1472 ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
1473 ; RV32I-NEXT: addi sp, sp, 16
1476 ; RV64I-LABEL: abd_minmax_i128:
1478 ; RV64I-NEXT: beq a1, a3, .LBB17_2
1479 ; RV64I-NEXT: # %bb.1:
1480 ; RV64I-NEXT: slt a6, a1, a3
1481 ; RV64I-NEXT: j .LBB17_3
1482 ; RV64I-NEXT: .LBB17_2:
1483 ; RV64I-NEXT: sltu a6, a0, a2
1484 ; RV64I-NEXT: .LBB17_3:
1485 ; RV64I-NEXT: mv a4, a1
1486 ; RV64I-NEXT: mv a5, a0
1487 ; RV64I-NEXT: bnez a6, .LBB17_5
1488 ; RV64I-NEXT: # %bb.4:
1489 ; RV64I-NEXT: mv a4, a3
1490 ; RV64I-NEXT: mv a5, a2
1491 ; RV64I-NEXT: .LBB17_5:
1492 ; RV64I-NEXT: beq a1, a3, .LBB17_7
1493 ; RV64I-NEXT: # %bb.6:
1494 ; RV64I-NEXT: slt a6, a3, a1
1495 ; RV64I-NEXT: beqz a6, .LBB17_8
1496 ; RV64I-NEXT: j .LBB17_9
1497 ; RV64I-NEXT: .LBB17_7:
1498 ; RV64I-NEXT: sltu a6, a2, a0
1499 ; RV64I-NEXT: bnez a6, .LBB17_9
1500 ; RV64I-NEXT: .LBB17_8:
1501 ; RV64I-NEXT: mv a1, a3
1502 ; RV64I-NEXT: mv a0, a2
1503 ; RV64I-NEXT: .LBB17_9:
1504 ; RV64I-NEXT: sltu a2, a5, a0
1505 ; RV64I-NEXT: sub a1, a4, a1
1506 ; RV64I-NEXT: sub a1, a1, a2
1507 ; RV64I-NEXT: sub a0, a5, a0
1510 ; RV32ZBB-LABEL: abd_minmax_i128:
1512 ; RV32ZBB-NEXT: lw a6, 4(a2)
1513 ; RV32ZBB-NEXT: lw a7, 8(a2)
1514 ; RV32ZBB-NEXT: lw t0, 12(a2)
1515 ; RV32ZBB-NEXT: lw a5, 12(a1)
1516 ; RV32ZBB-NEXT: lw a3, 4(a1)
1517 ; RV32ZBB-NEXT: lw a4, 8(a1)
1518 ; RV32ZBB-NEXT: beq a5, t0, .LBB17_2
1519 ; RV32ZBB-NEXT: # %bb.1:
1520 ; RV32ZBB-NEXT: slt t1, a5, t0
1521 ; RV32ZBB-NEXT: j .LBB17_3
1522 ; RV32ZBB-NEXT: .LBB17_2:
1523 ; RV32ZBB-NEXT: sltu t1, a4, a7
1524 ; RV32ZBB-NEXT: .LBB17_3:
1525 ; RV32ZBB-NEXT: lw t2, 0(a2)
1526 ; RV32ZBB-NEXT: lw a1, 0(a1)
1527 ; RV32ZBB-NEXT: beq a3, a6, .LBB17_5
1528 ; RV32ZBB-NEXT: # %bb.4:
1529 ; RV32ZBB-NEXT: sltu t6, a3, a6
1530 ; RV32ZBB-NEXT: j .LBB17_6
1531 ; RV32ZBB-NEXT: .LBB17_5:
1532 ; RV32ZBB-NEXT: sltu t6, a1, t2
1533 ; RV32ZBB-NEXT: .LBB17_6:
1534 ; RV32ZBB-NEXT: xor a2, a5, t0
1535 ; RV32ZBB-NEXT: xor t3, a4, a7
1536 ; RV32ZBB-NEXT: or t5, t3, a2
1537 ; RV32ZBB-NEXT: beqz t5, .LBB17_8
1538 ; RV32ZBB-NEXT: # %bb.7:
1539 ; RV32ZBB-NEXT: mv t6, t1
1540 ; RV32ZBB-NEXT: .LBB17_8:
1541 ; RV32ZBB-NEXT: mv a2, a1
1542 ; RV32ZBB-NEXT: mv t1, a3
1543 ; RV32ZBB-NEXT: mv t4, a5
1544 ; RV32ZBB-NEXT: mv t3, a4
1545 ; RV32ZBB-NEXT: bnez t6, .LBB17_10
1546 ; RV32ZBB-NEXT: # %bb.9:
1547 ; RV32ZBB-NEXT: mv a2, t2
1548 ; RV32ZBB-NEXT: mv t1, a6
1549 ; RV32ZBB-NEXT: mv t4, t0
1550 ; RV32ZBB-NEXT: mv t3, a7
1551 ; RV32ZBB-NEXT: .LBB17_10:
1552 ; RV32ZBB-NEXT: beq a5, t0, .LBB17_12
1553 ; RV32ZBB-NEXT: # %bb.11:
1554 ; RV32ZBB-NEXT: slt t6, t0, a5
1555 ; RV32ZBB-NEXT: j .LBB17_13
1556 ; RV32ZBB-NEXT: .LBB17_12:
1557 ; RV32ZBB-NEXT: sltu t6, a7, a4
1558 ; RV32ZBB-NEXT: .LBB17_13:
1559 ; RV32ZBB-NEXT: addi sp, sp, -16
1560 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
1561 ; RV32ZBB-NEXT: beq a3, a6, .LBB17_15
1562 ; RV32ZBB-NEXT: # %bb.14:
1563 ; RV32ZBB-NEXT: sltu s0, a6, a3
1564 ; RV32ZBB-NEXT: bnez t5, .LBB17_16
1565 ; RV32ZBB-NEXT: j .LBB17_17
1566 ; RV32ZBB-NEXT: .LBB17_15:
1567 ; RV32ZBB-NEXT: sltu s0, t2, a1
1568 ; RV32ZBB-NEXT: beqz t5, .LBB17_17
1569 ; RV32ZBB-NEXT: .LBB17_16:
1570 ; RV32ZBB-NEXT: mv s0, t6
1571 ; RV32ZBB-NEXT: .LBB17_17:
1572 ; RV32ZBB-NEXT: bnez s0, .LBB17_19
1573 ; RV32ZBB-NEXT: # %bb.18:
1574 ; RV32ZBB-NEXT: mv a1, t2
1575 ; RV32ZBB-NEXT: mv a3, a6
1576 ; RV32ZBB-NEXT: mv a5, t0
1577 ; RV32ZBB-NEXT: mv a4, a7
1578 ; RV32ZBB-NEXT: .LBB17_19:
1579 ; RV32ZBB-NEXT: sltu a7, t3, a4
1580 ; RV32ZBB-NEXT: sub a5, t4, a5
1581 ; RV32ZBB-NEXT: sltu a6, a2, a1
1582 ; RV32ZBB-NEXT: sub a5, a5, a7
1583 ; RV32ZBB-NEXT: mv a7, a6
1584 ; RV32ZBB-NEXT: beq t1, a3, .LBB17_21
1585 ; RV32ZBB-NEXT: # %bb.20:
1586 ; RV32ZBB-NEXT: sltu a7, t1, a3
1587 ; RV32ZBB-NEXT: .LBB17_21:
1588 ; RV32ZBB-NEXT: sub a4, t3, a4
1589 ; RV32ZBB-NEXT: sub a3, t1, a3
1590 ; RV32ZBB-NEXT: sub a2, a2, a1
1591 ; RV32ZBB-NEXT: sltu a1, a4, a7
1592 ; RV32ZBB-NEXT: sub a4, a4, a7
1593 ; RV32ZBB-NEXT: sub a3, a3, a6
1594 ; RV32ZBB-NEXT: sub a5, a5, a1
1595 ; RV32ZBB-NEXT: sw a2, 0(a0)
1596 ; RV32ZBB-NEXT: sw a3, 4(a0)
1597 ; RV32ZBB-NEXT: sw a4, 8(a0)
1598 ; RV32ZBB-NEXT: sw a5, 12(a0)
1599 ; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
1600 ; RV32ZBB-NEXT: addi sp, sp, 16
1603 ; RV64ZBB-LABEL: abd_minmax_i128:
1605 ; RV64ZBB-NEXT: beq a1, a3, .LBB17_2
1606 ; RV64ZBB-NEXT: # %bb.1:
1607 ; RV64ZBB-NEXT: slt a6, a1, a3
1608 ; RV64ZBB-NEXT: j .LBB17_3
1609 ; RV64ZBB-NEXT: .LBB17_2:
1610 ; RV64ZBB-NEXT: sltu a6, a0, a2
1611 ; RV64ZBB-NEXT: .LBB17_3:
1612 ; RV64ZBB-NEXT: mv a4, a1
1613 ; RV64ZBB-NEXT: mv a5, a0
1614 ; RV64ZBB-NEXT: bnez a6, .LBB17_5
1615 ; RV64ZBB-NEXT: # %bb.4:
1616 ; RV64ZBB-NEXT: mv a4, a3
1617 ; RV64ZBB-NEXT: mv a5, a2
1618 ; RV64ZBB-NEXT: .LBB17_5:
1619 ; RV64ZBB-NEXT: beq a1, a3, .LBB17_7
1620 ; RV64ZBB-NEXT: # %bb.6:
1621 ; RV64ZBB-NEXT: slt a6, a3, a1
1622 ; RV64ZBB-NEXT: beqz a6, .LBB17_8
1623 ; RV64ZBB-NEXT: j .LBB17_9
1624 ; RV64ZBB-NEXT: .LBB17_7:
1625 ; RV64ZBB-NEXT: sltu a6, a2, a0
1626 ; RV64ZBB-NEXT: bnez a6, .LBB17_9
1627 ; RV64ZBB-NEXT: .LBB17_8:
1628 ; RV64ZBB-NEXT: mv a1, a3
1629 ; RV64ZBB-NEXT: mv a0, a2
1630 ; RV64ZBB-NEXT: .LBB17_9:
1631 ; RV64ZBB-NEXT: sltu a2, a5, a0
1632 ; RV64ZBB-NEXT: sub a1, a4, a1
1633 ; RV64ZBB-NEXT: sub a1, a1, a2
1634 ; RV64ZBB-NEXT: sub a0, a5, a0
1636 %min = call i128 @llvm.smin.i128(i128 %a, i128 %b)
1637 %max = call i128 @llvm.smax.i128(i128 %a, i128 %b)
1638 %sub = sub i128 %min, %max
1643 ; select(icmp(a,b),sub(a,b),sub(b,a)) -> nabds(a,b)
1646 define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
1647 ; RV32I-LABEL: abd_cmp_i8:
1649 ; RV32I-NEXT: slli a2, a0, 24
1650 ; RV32I-NEXT: slli a3, a1, 24
1651 ; RV32I-NEXT: srai a2, a2, 24
1652 ; RV32I-NEXT: srai a3, a3, 24
1653 ; RV32I-NEXT: bge a3, a2, .LBB18_2
1654 ; RV32I-NEXT: # %bb.1:
1655 ; RV32I-NEXT: sub a0, a1, a0
1657 ; RV32I-NEXT: .LBB18_2:
1658 ; RV32I-NEXT: sub a0, a0, a1
1661 ; RV64I-LABEL: abd_cmp_i8:
1663 ; RV64I-NEXT: slli a2, a0, 56
1664 ; RV64I-NEXT: slli a3, a1, 56
1665 ; RV64I-NEXT: srai a2, a2, 56
1666 ; RV64I-NEXT: srai a3, a3, 56
1667 ; RV64I-NEXT: bge a3, a2, .LBB18_2
1668 ; RV64I-NEXT: # %bb.1:
1669 ; RV64I-NEXT: sub a0, a1, a0
1671 ; RV64I-NEXT: .LBB18_2:
1672 ; RV64I-NEXT: sub a0, a0, a1
1675 ; ZBB-LABEL: abd_cmp_i8:
1677 ; ZBB-NEXT: sext.b a2, a0
1678 ; ZBB-NEXT: sext.b a3, a1
1679 ; ZBB-NEXT: bge a3, a2, .LBB18_2
1680 ; ZBB-NEXT: # %bb.1:
1681 ; ZBB-NEXT: sub a0, a1, a0
1683 ; ZBB-NEXT: .LBB18_2:
1684 ; ZBB-NEXT: sub a0, a0, a1
1686 %cmp = icmp sle i8 %a, %b
1689 %sel = select i1 %cmp, i8 %ab, i8 %ba
1693 define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
1694 ; RV32I-LABEL: abd_cmp_i16:
1696 ; RV32I-NEXT: slli a2, a1, 16
1697 ; RV32I-NEXT: slli a3, a0, 16
1698 ; RV32I-NEXT: srai a2, a2, 16
1699 ; RV32I-NEXT: srai a3, a3, 16
1700 ; RV32I-NEXT: blt a3, a2, .LBB19_2
1701 ; RV32I-NEXT: # %bb.1:
1702 ; RV32I-NEXT: sub a0, a1, a0
1704 ; RV32I-NEXT: .LBB19_2:
1705 ; RV32I-NEXT: sub a0, a0, a1
1708 ; RV64I-LABEL: abd_cmp_i16:
1710 ; RV64I-NEXT: slli a2, a1, 48
1711 ; RV64I-NEXT: slli a3, a0, 48
1712 ; RV64I-NEXT: srai a2, a2, 48
1713 ; RV64I-NEXT: srai a3, a3, 48
1714 ; RV64I-NEXT: blt a3, a2, .LBB19_2
1715 ; RV64I-NEXT: # %bb.1:
1716 ; RV64I-NEXT: sub a0, a1, a0
1718 ; RV64I-NEXT: .LBB19_2:
1719 ; RV64I-NEXT: sub a0, a0, a1
1722 ; ZBB-LABEL: abd_cmp_i16:
1724 ; ZBB-NEXT: sext.h a2, a1
1725 ; ZBB-NEXT: sext.h a3, a0
1726 ; ZBB-NEXT: blt a3, a2, .LBB19_2
1727 ; ZBB-NEXT: # %bb.1:
1728 ; ZBB-NEXT: sub a0, a1, a0
1730 ; ZBB-NEXT: .LBB19_2:
1731 ; ZBB-NEXT: sub a0, a0, a1
1733 %cmp = icmp slt i16 %a, %b
1734 %ab = sub i16 %a, %b
1735 %ba = sub i16 %b, %a
1736 %sel = select i1 %cmp, i16 %ab, i16 %ba
1740 define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
1741 ; RV32I-LABEL: abd_cmp_i32:
1743 ; RV32I-NEXT: bge a0, a1, .LBB20_2
1744 ; RV32I-NEXT: # %bb.1:
1745 ; RV32I-NEXT: sub a0, a0, a1
1747 ; RV32I-NEXT: .LBB20_2:
1748 ; RV32I-NEXT: sub a0, a1, a0
1751 ; RV64I-LABEL: abd_cmp_i32:
1753 ; RV64I-NEXT: sext.w a2, a1
1754 ; RV64I-NEXT: sext.w a3, a0
1755 ; RV64I-NEXT: bge a3, a2, .LBB20_2
1756 ; RV64I-NEXT: # %bb.1:
1757 ; RV64I-NEXT: subw a0, a0, a1
1759 ; RV64I-NEXT: .LBB20_2:
1760 ; RV64I-NEXT: subw a0, a1, a0
1763 ; RV32ZBB-LABEL: abd_cmp_i32:
1765 ; RV32ZBB-NEXT: bge a0, a1, .LBB20_2
1766 ; RV32ZBB-NEXT: # %bb.1:
1767 ; RV32ZBB-NEXT: sub a0, a0, a1
1769 ; RV32ZBB-NEXT: .LBB20_2:
1770 ; RV32ZBB-NEXT: sub a0, a1, a0
1773 ; RV64ZBB-LABEL: abd_cmp_i32:
1775 ; RV64ZBB-NEXT: sext.w a2, a1
1776 ; RV64ZBB-NEXT: sext.w a3, a0
1777 ; RV64ZBB-NEXT: bge a3, a2, .LBB20_2
1778 ; RV64ZBB-NEXT: # %bb.1:
1779 ; RV64ZBB-NEXT: subw a0, a0, a1
1781 ; RV64ZBB-NEXT: .LBB20_2:
1782 ; RV64ZBB-NEXT: subw a0, a1, a0
1784 %cmp = icmp sge i32 %a, %b
1785 %ab = sub i32 %a, %b
1786 %ba = sub i32 %b, %a
1787 %sel = select i1 %cmp, i32 %ba, i32 %ab
1791 define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
1792 ; RV32I-LABEL: abd_cmp_i64:
1794 ; RV32I-NEXT: sltu a4, a0, a2
1795 ; RV32I-NEXT: mv a5, a4
1796 ; RV32I-NEXT: beq a1, a3, .LBB21_2
1797 ; RV32I-NEXT: # %bb.1:
1798 ; RV32I-NEXT: slt a5, a1, a3
1799 ; RV32I-NEXT: .LBB21_2:
1800 ; RV32I-NEXT: bnez a5, .LBB21_4
1801 ; RV32I-NEXT: # %bb.3:
1802 ; RV32I-NEXT: sltu a4, a2, a0
1803 ; RV32I-NEXT: sub a1, a3, a1
1804 ; RV32I-NEXT: sub a1, a1, a4
1805 ; RV32I-NEXT: sub a0, a2, a0
1807 ; RV32I-NEXT: .LBB21_4:
1808 ; RV32I-NEXT: sub a1, a1, a3
1809 ; RV32I-NEXT: sub a1, a1, a4
1810 ; RV32I-NEXT: sub a0, a0, a2
1813 ; RV64I-LABEL: abd_cmp_i64:
1815 ; RV64I-NEXT: blt a0, a1, .LBB21_2
1816 ; RV64I-NEXT: # %bb.1:
1817 ; RV64I-NEXT: sub a0, a1, a0
1819 ; RV64I-NEXT: .LBB21_2:
1820 ; RV64I-NEXT: sub a0, a0, a1
1823 ; RV32ZBB-LABEL: abd_cmp_i64:
1825 ; RV32ZBB-NEXT: sltu a4, a0, a2
1826 ; RV32ZBB-NEXT: mv a5, a4
1827 ; RV32ZBB-NEXT: beq a1, a3, .LBB21_2
1828 ; RV32ZBB-NEXT: # %bb.1:
1829 ; RV32ZBB-NEXT: slt a5, a1, a3
1830 ; RV32ZBB-NEXT: .LBB21_2:
1831 ; RV32ZBB-NEXT: bnez a5, .LBB21_4
1832 ; RV32ZBB-NEXT: # %bb.3:
1833 ; RV32ZBB-NEXT: sltu a4, a2, a0
1834 ; RV32ZBB-NEXT: sub a1, a3, a1
1835 ; RV32ZBB-NEXT: sub a1, a1, a4
1836 ; RV32ZBB-NEXT: sub a0, a2, a0
1838 ; RV32ZBB-NEXT: .LBB21_4:
1839 ; RV32ZBB-NEXT: sub a1, a1, a3
1840 ; RV32ZBB-NEXT: sub a1, a1, a4
1841 ; RV32ZBB-NEXT: sub a0, a0, a2
1844 ; RV64ZBB-LABEL: abd_cmp_i64:
1846 ; RV64ZBB-NEXT: blt a0, a1, .LBB21_2
1847 ; RV64ZBB-NEXT: # %bb.1:
1848 ; RV64ZBB-NEXT: sub a0, a1, a0
1850 ; RV64ZBB-NEXT: .LBB21_2:
1851 ; RV64ZBB-NEXT: sub a0, a0, a1
1853 %cmp = icmp slt i64 %a, %b
1854 %ab = sub i64 %a, %b
1855 %ba = sub i64 %b, %a
1856 %sel = select i1 %cmp, i64 %ab, i64 %ba
1860 define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
1861 ; RV32I-LABEL: abd_cmp_i128:
1863 ; RV32I-NEXT: lw a3, 0(a2)
1864 ; RV32I-NEXT: lw a4, 4(a2)
1865 ; RV32I-NEXT: lw a5, 8(a2)
1866 ; RV32I-NEXT: lw a7, 12(a2)
1867 ; RV32I-NEXT: lw a6, 8(a1)
1868 ; RV32I-NEXT: lw t0, 12(a1)
1869 ; RV32I-NEXT: lw a2, 0(a1)
1870 ; RV32I-NEXT: lw a1, 4(a1)
1871 ; RV32I-NEXT: sltu t1, a6, a5
1872 ; RV32I-NEXT: mv t4, t1
1873 ; RV32I-NEXT: beq t0, a7, .LBB22_2
1874 ; RV32I-NEXT: # %bb.1:
1875 ; RV32I-NEXT: slt t4, t0, a7
1876 ; RV32I-NEXT: .LBB22_2:
1877 ; RV32I-NEXT: sltu t2, a2, a3
1878 ; RV32I-NEXT: mv t3, t2
1879 ; RV32I-NEXT: beq a1, a4, .LBB22_4
1880 ; RV32I-NEXT: # %bb.3:
1881 ; RV32I-NEXT: sltu t3, a1, a4
1882 ; RV32I-NEXT: .LBB22_4:
1883 ; RV32I-NEXT: xor t5, t0, a7
1884 ; RV32I-NEXT: xor t6, a6, a5
1885 ; RV32I-NEXT: or t5, t6, t5
1886 ; RV32I-NEXT: mv t6, t3
1887 ; RV32I-NEXT: beqz t5, .LBB22_6
1888 ; RV32I-NEXT: # %bb.5:
1889 ; RV32I-NEXT: mv t6, t4
1890 ; RV32I-NEXT: .LBB22_6:
1891 ; RV32I-NEXT: sltu t4, a3, a2
1892 ; RV32I-NEXT: mv t5, t4
1893 ; RV32I-NEXT: beq a1, a4, .LBB22_8
1894 ; RV32I-NEXT: # %bb.7:
1895 ; RV32I-NEXT: sltu t5, a4, a1
1896 ; RV32I-NEXT: .LBB22_8:
1897 ; RV32I-NEXT: bnez t6, .LBB22_10
1898 ; RV32I-NEXT: # %bb.9:
1899 ; RV32I-NEXT: sltu t1, a5, a6
1900 ; RV32I-NEXT: sub a7, a7, t0
1901 ; RV32I-NEXT: sub a5, a5, a6
1902 ; RV32I-NEXT: sub a4, a4, a1
1903 ; RV32I-NEXT: sub a6, a7, t1
1904 ; RV32I-NEXT: sltu a7, a5, t5
1905 ; RV32I-NEXT: sub a1, a5, t5
1906 ; RV32I-NEXT: sub a5, a4, t4
1907 ; RV32I-NEXT: sub a4, a6, a7
1908 ; RV32I-NEXT: sub a2, a3, a2
1909 ; RV32I-NEXT: j .LBB22_11
1910 ; RV32I-NEXT: .LBB22_10:
1911 ; RV32I-NEXT: sub a7, t0, a7
1912 ; RV32I-NEXT: sub a5, a6, a5
1913 ; RV32I-NEXT: sub a4, a1, a4
1914 ; RV32I-NEXT: sub a6, a7, t1
1915 ; RV32I-NEXT: sltu a7, a5, t3
1916 ; RV32I-NEXT: sub a1, a5, t3
1917 ; RV32I-NEXT: sub a5, a4, t2
1918 ; RV32I-NEXT: sub a4, a6, a7
1919 ; RV32I-NEXT: sub a2, a2, a3
1920 ; RV32I-NEXT: .LBB22_11:
1921 ; RV32I-NEXT: sw a2, 0(a0)
1922 ; RV32I-NEXT: sw a5, 4(a0)
1923 ; RV32I-NEXT: sw a1, 8(a0)
1924 ; RV32I-NEXT: sw a4, 12(a0)
1927 ; RV64I-LABEL: abd_cmp_i128:
1929 ; RV64I-NEXT: sltu a4, a0, a2
1930 ; RV64I-NEXT: mv a5, a4
1931 ; RV64I-NEXT: beq a1, a3, .LBB22_2
1932 ; RV64I-NEXT: # %bb.1:
1933 ; RV64I-NEXT: slt a5, a1, a3
1934 ; RV64I-NEXT: .LBB22_2:
1935 ; RV64I-NEXT: bnez a5, .LBB22_4
1936 ; RV64I-NEXT: # %bb.3:
1937 ; RV64I-NEXT: sltu a4, a2, a0
1938 ; RV64I-NEXT: sub a1, a3, a1
1939 ; RV64I-NEXT: sub a1, a1, a4
1940 ; RV64I-NEXT: sub a0, a2, a0
1942 ; RV64I-NEXT: .LBB22_4:
1943 ; RV64I-NEXT: sub a1, a1, a3
1944 ; RV64I-NEXT: sub a1, a1, a4
1945 ; RV64I-NEXT: sub a0, a0, a2
1948 ; RV32ZBB-LABEL: abd_cmp_i128:
1950 ; RV32ZBB-NEXT: lw a3, 0(a2)
1951 ; RV32ZBB-NEXT: lw a4, 4(a2)
1952 ; RV32ZBB-NEXT: lw a5, 8(a2)
1953 ; RV32ZBB-NEXT: lw a7, 12(a2)
1954 ; RV32ZBB-NEXT: lw a6, 8(a1)
1955 ; RV32ZBB-NEXT: lw t0, 12(a1)
1956 ; RV32ZBB-NEXT: lw a2, 0(a1)
1957 ; RV32ZBB-NEXT: lw a1, 4(a1)
1958 ; RV32ZBB-NEXT: sltu t1, a6, a5
1959 ; RV32ZBB-NEXT: mv t4, t1
1960 ; RV32ZBB-NEXT: beq t0, a7, .LBB22_2
1961 ; RV32ZBB-NEXT: # %bb.1:
1962 ; RV32ZBB-NEXT: slt t4, t0, a7
1963 ; RV32ZBB-NEXT: .LBB22_2:
1964 ; RV32ZBB-NEXT: sltu t2, a2, a3
1965 ; RV32ZBB-NEXT: mv t3, t2
1966 ; RV32ZBB-NEXT: beq a1, a4, .LBB22_4
1967 ; RV32ZBB-NEXT: # %bb.3:
1968 ; RV32ZBB-NEXT: sltu t3, a1, a4
1969 ; RV32ZBB-NEXT: .LBB22_4:
1970 ; RV32ZBB-NEXT: xor t5, t0, a7
1971 ; RV32ZBB-NEXT: xor t6, a6, a5
1972 ; RV32ZBB-NEXT: or t5, t6, t5
1973 ; RV32ZBB-NEXT: mv t6, t3
1974 ; RV32ZBB-NEXT: beqz t5, .LBB22_6
1975 ; RV32ZBB-NEXT: # %bb.5:
1976 ; RV32ZBB-NEXT: mv t6, t4
1977 ; RV32ZBB-NEXT: .LBB22_6:
1978 ; RV32ZBB-NEXT: sltu t4, a3, a2
1979 ; RV32ZBB-NEXT: mv t5, t4
1980 ; RV32ZBB-NEXT: beq a1, a4, .LBB22_8
1981 ; RV32ZBB-NEXT: # %bb.7:
1982 ; RV32ZBB-NEXT: sltu t5, a4, a1
1983 ; RV32ZBB-NEXT: .LBB22_8:
1984 ; RV32ZBB-NEXT: bnez t6, .LBB22_10
1985 ; RV32ZBB-NEXT: # %bb.9:
1986 ; RV32ZBB-NEXT: sltu t1, a5, a6
1987 ; RV32ZBB-NEXT: sub a7, a7, t0
1988 ; RV32ZBB-NEXT: sub a5, a5, a6
1989 ; RV32ZBB-NEXT: sub a4, a4, a1
1990 ; RV32ZBB-NEXT: sub a6, a7, t1
1991 ; RV32ZBB-NEXT: sltu a7, a5, t5
1992 ; RV32ZBB-NEXT: sub a1, a5, t5
1993 ; RV32ZBB-NEXT: sub a5, a4, t4
1994 ; RV32ZBB-NEXT: sub a4, a6, a7
1995 ; RV32ZBB-NEXT: sub a2, a3, a2
1996 ; RV32ZBB-NEXT: j .LBB22_11
1997 ; RV32ZBB-NEXT: .LBB22_10:
1998 ; RV32ZBB-NEXT: sub a7, t0, a7
1999 ; RV32ZBB-NEXT: sub a5, a6, a5
2000 ; RV32ZBB-NEXT: sub a4, a1, a4
2001 ; RV32ZBB-NEXT: sub a6, a7, t1
2002 ; RV32ZBB-NEXT: sltu a7, a5, t3
2003 ; RV32ZBB-NEXT: sub a1, a5, t3
2004 ; RV32ZBB-NEXT: sub a5, a4, t2
2005 ; RV32ZBB-NEXT: sub a4, a6, a7
2006 ; RV32ZBB-NEXT: sub a2, a2, a3
2007 ; RV32ZBB-NEXT: .LBB22_11:
2008 ; RV32ZBB-NEXT: sw a2, 0(a0)
2009 ; RV32ZBB-NEXT: sw a5, 4(a0)
2010 ; RV32ZBB-NEXT: sw a1, 8(a0)
2011 ; RV32ZBB-NEXT: sw a4, 12(a0)
2014 ; RV64ZBB-LABEL: abd_cmp_i128:
2016 ; RV64ZBB-NEXT: sltu a4, a0, a2
2017 ; RV64ZBB-NEXT: mv a5, a4
2018 ; RV64ZBB-NEXT: beq a1, a3, .LBB22_2
2019 ; RV64ZBB-NEXT: # %bb.1:
2020 ; RV64ZBB-NEXT: slt a5, a1, a3
2021 ; RV64ZBB-NEXT: .LBB22_2:
2022 ; RV64ZBB-NEXT: bnez a5, .LBB22_4
2023 ; RV64ZBB-NEXT: # %bb.3:
2024 ; RV64ZBB-NEXT: sltu a4, a2, a0
2025 ; RV64ZBB-NEXT: sub a1, a3, a1
2026 ; RV64ZBB-NEXT: sub a1, a1, a4
2027 ; RV64ZBB-NEXT: sub a0, a2, a0
2029 ; RV64ZBB-NEXT: .LBB22_4:
2030 ; RV64ZBB-NEXT: sub a1, a1, a3
2031 ; RV64ZBB-NEXT: sub a1, a1, a4
2032 ; RV64ZBB-NEXT: sub a0, a0, a2
2034 %cmp = icmp slt i128 %a, %b
2035 %ab = sub i128 %a, %b
2036 %ba = sub i128 %b, %a
2037 %sel = select i1 %cmp, i128 %ab, i128 %ba
2042 ; nabs(sub_nsw(x, y)) -> nabds(a,b)
2045 define i8 @abd_subnsw_i8(i8 %a, i8 %b) nounwind {
2046 ; RV32I-LABEL: abd_subnsw_i8:
2048 ; RV32I-NEXT: sub a0, a0, a1
2049 ; RV32I-NEXT: slli a1, a0, 24
2050 ; RV32I-NEXT: srai a1, a1, 31
2051 ; RV32I-NEXT: xor a0, a0, a1
2052 ; RV32I-NEXT: sub a0, a1, a0
2055 ; RV64I-LABEL: abd_subnsw_i8:
2057 ; RV64I-NEXT: sub a0, a0, a1
2058 ; RV64I-NEXT: slli a1, a0, 56
2059 ; RV64I-NEXT: srai a1, a1, 63
2060 ; RV64I-NEXT: xor a0, a0, a1
2061 ; RV64I-NEXT: sub a0, a1, a0
2064 ; RV32ZBB-LABEL: abd_subnsw_i8:
2066 ; RV32ZBB-NEXT: sub a0, a0, a1
2067 ; RV32ZBB-NEXT: slli a1, a0, 24
2068 ; RV32ZBB-NEXT: srai a1, a1, 31
2069 ; RV32ZBB-NEXT: xor a0, a0, a1
2070 ; RV32ZBB-NEXT: sub a0, a1, a0
2073 ; RV64ZBB-LABEL: abd_subnsw_i8:
2075 ; RV64ZBB-NEXT: sub a0, a0, a1
2076 ; RV64ZBB-NEXT: slli a1, a0, 56
2077 ; RV64ZBB-NEXT: srai a1, a1, 63
2078 ; RV64ZBB-NEXT: xor a0, a0, a1
2079 ; RV64ZBB-NEXT: sub a0, a1, a0
2081 %sub = sub nsw i8 %a, %b
2082 %abs = call i8 @llvm.abs.i8(i8 %sub, i1 false)
2083 %nabs = sub i8 0, %abs
2087 define i8 @abd_subnsw_i8_undef(i8 %a, i8 %b) nounwind {
2088 ; RV32I-LABEL: abd_subnsw_i8_undef:
2090 ; RV32I-NEXT: sub a0, a0, a1
2091 ; RV32I-NEXT: slli a1, a0, 24
2092 ; RV32I-NEXT: srai a1, a1, 31
2093 ; RV32I-NEXT: xor a0, a0, a1
2094 ; RV32I-NEXT: sub a0, a1, a0
2097 ; RV64I-LABEL: abd_subnsw_i8_undef:
2099 ; RV64I-NEXT: sub a0, a0, a1
2100 ; RV64I-NEXT: slli a1, a0, 56
2101 ; RV64I-NEXT: srai a1, a1, 63
2102 ; RV64I-NEXT: xor a0, a0, a1
2103 ; RV64I-NEXT: sub a0, a1, a0
2106 ; RV32ZBB-LABEL: abd_subnsw_i8_undef:
2108 ; RV32ZBB-NEXT: sub a0, a0, a1
2109 ; RV32ZBB-NEXT: slli a1, a0, 24
2110 ; RV32ZBB-NEXT: srai a1, a1, 31
2111 ; RV32ZBB-NEXT: xor a0, a0, a1
2112 ; RV32ZBB-NEXT: sub a0, a1, a0
2115 ; RV64ZBB-LABEL: abd_subnsw_i8_undef:
2117 ; RV64ZBB-NEXT: sub a0, a0, a1
2118 ; RV64ZBB-NEXT: slli a1, a0, 56
2119 ; RV64ZBB-NEXT: srai a1, a1, 63
2120 ; RV64ZBB-NEXT: xor a0, a0, a1
2121 ; RV64ZBB-NEXT: sub a0, a1, a0
2123 %sub = sub nsw i8 %a, %b
2124 %abs = call i8 @llvm.abs.i8(i8 %sub, i1 true)
2125 %nabs = sub i8 0, %abs
2129 define i16 @abd_subnsw_i16(i16 %a, i16 %b) nounwind {
2130 ; RV32I-LABEL: abd_subnsw_i16:
2132 ; RV32I-NEXT: sub a0, a0, a1
2133 ; RV32I-NEXT: slli a1, a0, 16
2134 ; RV32I-NEXT: srai a1, a1, 31
2135 ; RV32I-NEXT: xor a0, a0, a1
2136 ; RV32I-NEXT: sub a0, a1, a0
2139 ; RV64I-LABEL: abd_subnsw_i16:
2141 ; RV64I-NEXT: sub a0, a0, a1
2142 ; RV64I-NEXT: slli a1, a0, 48
2143 ; RV64I-NEXT: srai a1, a1, 63
2144 ; RV64I-NEXT: xor a0, a0, a1
2145 ; RV64I-NEXT: sub a0, a1, a0
2148 ; RV32ZBB-LABEL: abd_subnsw_i16:
2150 ; RV32ZBB-NEXT: sub a0, a0, a1
2151 ; RV32ZBB-NEXT: slli a1, a0, 16
2152 ; RV32ZBB-NEXT: srai a1, a1, 31
2153 ; RV32ZBB-NEXT: xor a0, a0, a1
2154 ; RV32ZBB-NEXT: sub a0, a1, a0
2157 ; RV64ZBB-LABEL: abd_subnsw_i16:
2159 ; RV64ZBB-NEXT: sub a0, a0, a1
2160 ; RV64ZBB-NEXT: slli a1, a0, 48
2161 ; RV64ZBB-NEXT: srai a1, a1, 63
2162 ; RV64ZBB-NEXT: xor a0, a0, a1
2163 ; RV64ZBB-NEXT: sub a0, a1, a0
2165 %sub = sub nsw i16 %a, %b
2166 %abs = call i16 @llvm.abs.i16(i16 %sub, i1 false)
2167 %nabs = sub i16 0, %abs
2171 define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind {
2172 ; RV32I-LABEL: abd_subnsw_i16_undef:
2174 ; RV32I-NEXT: sub a0, a0, a1
2175 ; RV32I-NEXT: slli a1, a0, 16
2176 ; RV32I-NEXT: srai a1, a1, 31
2177 ; RV32I-NEXT: xor a0, a0, a1
2178 ; RV32I-NEXT: sub a0, a1, a0
2181 ; RV64I-LABEL: abd_subnsw_i16_undef:
2183 ; RV64I-NEXT: sub a0, a0, a1
2184 ; RV64I-NEXT: slli a1, a0, 48
2185 ; RV64I-NEXT: srai a1, a1, 63
2186 ; RV64I-NEXT: xor a0, a0, a1
2187 ; RV64I-NEXT: sub a0, a1, a0
2190 ; RV32ZBB-LABEL: abd_subnsw_i16_undef:
2192 ; RV32ZBB-NEXT: sub a0, a0, a1
2193 ; RV32ZBB-NEXT: slli a1, a0, 16
2194 ; RV32ZBB-NEXT: srai a1, a1, 31
2195 ; RV32ZBB-NEXT: xor a0, a0, a1
2196 ; RV32ZBB-NEXT: sub a0, a1, a0
2199 ; RV64ZBB-LABEL: abd_subnsw_i16_undef:
2201 ; RV64ZBB-NEXT: sub a0, a0, a1
2202 ; RV64ZBB-NEXT: slli a1, a0, 48
2203 ; RV64ZBB-NEXT: srai a1, a1, 63
2204 ; RV64ZBB-NEXT: xor a0, a0, a1
2205 ; RV64ZBB-NEXT: sub a0, a1, a0
2207 %sub = sub nsw i16 %a, %b
2208 %abs = call i16 @llvm.abs.i16(i16 %sub, i1 true)
2209 %nabs = sub i16 0, %abs
2213 define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
2214 ; RV32I-LABEL: abd_subnsw_i32:
2216 ; RV32I-NEXT: sub a0, a0, a1
2217 ; RV32I-NEXT: srai a1, a0, 31
2218 ; RV32I-NEXT: xor a0, a0, a1
2219 ; RV32I-NEXT: sub a0, a1, a0
2222 ; RV64I-LABEL: abd_subnsw_i32:
2224 ; RV64I-NEXT: subw a0, a0, a1
2225 ; RV64I-NEXT: sraiw a1, a0, 31
2226 ; RV64I-NEXT: xor a0, a0, a1
2227 ; RV64I-NEXT: subw a0, a1, a0
2230 ; RV32ZBB-LABEL: abd_subnsw_i32:
2232 ; RV32ZBB-NEXT: sub a0, a0, a1
2233 ; RV32ZBB-NEXT: neg a1, a0
2234 ; RV32ZBB-NEXT: min a0, a0, a1
2237 ; RV64ZBB-LABEL: abd_subnsw_i32:
2239 ; RV64ZBB-NEXT: subw a0, a0, a1
2240 ; RV64ZBB-NEXT: sraiw a1, a0, 31
2241 ; RV64ZBB-NEXT: xor a0, a0, a1
2242 ; RV64ZBB-NEXT: subw a0, a1, a0
2244 %sub = sub nsw i32 %a, %b
2245 %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
2246 %nabs = sub i32 0, %abs
2250 define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
2251 ; RV32I-LABEL: abd_subnsw_i32_undef:
2253 ; RV32I-NEXT: sub a0, a0, a1
2254 ; RV32I-NEXT: srai a1, a0, 31
2255 ; RV32I-NEXT: xor a0, a0, a1
2256 ; RV32I-NEXT: sub a0, a1, a0
2259 ; RV64I-LABEL: abd_subnsw_i32_undef:
2261 ; RV64I-NEXT: subw a0, a0, a1
2262 ; RV64I-NEXT: sraiw a1, a0, 31
2263 ; RV64I-NEXT: xor a0, a0, a1
2264 ; RV64I-NEXT: subw a0, a1, a0
2267 ; RV32ZBB-LABEL: abd_subnsw_i32_undef:
2269 ; RV32ZBB-NEXT: sub a0, a0, a1
2270 ; RV32ZBB-NEXT: neg a1, a0
2271 ; RV32ZBB-NEXT: min a0, a0, a1
2274 ; RV64ZBB-LABEL: abd_subnsw_i32_undef:
2276 ; RV64ZBB-NEXT: subw a0, a0, a1
2277 ; RV64ZBB-NEXT: sraiw a1, a0, 31
2278 ; RV64ZBB-NEXT: xor a0, a0, a1
2279 ; RV64ZBB-NEXT: subw a0, a1, a0
2281 %sub = sub nsw i32 %a, %b
2282 %abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
2283 %nabs = sub i32 0, %abs
2287 define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
2288 ; RV32I-LABEL: abd_subnsw_i64:
2290 ; RV32I-NEXT: sltu a4, a0, a2
2291 ; RV32I-NEXT: sub a1, a1, a3
2292 ; RV32I-NEXT: sub a0, a0, a2
2293 ; RV32I-NEXT: sub a1, a1, a4
2294 ; RV32I-NEXT: srai a2, a1, 31
2295 ; RV32I-NEXT: xor a0, a0, a2
2296 ; RV32I-NEXT: xor a1, a1, a2
2297 ; RV32I-NEXT: sltu a3, a2, a0
2298 ; RV32I-NEXT: sub a1, a2, a1
2299 ; RV32I-NEXT: sub a1, a1, a3
2300 ; RV32I-NEXT: sub a0, a2, a0
2303 ; RV64I-LABEL: abd_subnsw_i64:
2305 ; RV64I-NEXT: sub a0, a0, a1
2306 ; RV64I-NEXT: srai a1, a0, 63
2307 ; RV64I-NEXT: xor a0, a0, a1
2308 ; RV64I-NEXT: sub a0, a1, a0
2311 ; RV32ZBB-LABEL: abd_subnsw_i64:
2313 ; RV32ZBB-NEXT: sltu a4, a0, a2
2314 ; RV32ZBB-NEXT: sub a1, a1, a3
2315 ; RV32ZBB-NEXT: sub a0, a0, a2
2316 ; RV32ZBB-NEXT: sub a1, a1, a4
2317 ; RV32ZBB-NEXT: srai a2, a1, 31
2318 ; RV32ZBB-NEXT: xor a0, a0, a2
2319 ; RV32ZBB-NEXT: xor a1, a1, a2
2320 ; RV32ZBB-NEXT: sltu a3, a2, a0
2321 ; RV32ZBB-NEXT: sub a1, a2, a1
2322 ; RV32ZBB-NEXT: sub a1, a1, a3
2323 ; RV32ZBB-NEXT: sub a0, a2, a0
2326 ; RV64ZBB-LABEL: abd_subnsw_i64:
2328 ; RV64ZBB-NEXT: sub a0, a0, a1
2329 ; RV64ZBB-NEXT: neg a1, a0
2330 ; RV64ZBB-NEXT: min a0, a0, a1
2332 %sub = sub nsw i64 %a, %b
2333 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
2334 %nabs = sub i64 0, %abs
2338 define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
2339 ; RV32I-LABEL: abd_subnsw_i64_undef:
2341 ; RV32I-NEXT: sltu a4, a0, a2
2342 ; RV32I-NEXT: sub a1, a1, a3
2343 ; RV32I-NEXT: sub a0, a0, a2
2344 ; RV32I-NEXT: sub a1, a1, a4
2345 ; RV32I-NEXT: srai a2, a1, 31
2346 ; RV32I-NEXT: xor a0, a0, a2
2347 ; RV32I-NEXT: xor a1, a1, a2
2348 ; RV32I-NEXT: sltu a3, a2, a0
2349 ; RV32I-NEXT: sub a1, a2, a1
2350 ; RV32I-NEXT: sub a1, a1, a3
2351 ; RV32I-NEXT: sub a0, a2, a0
2354 ; RV64I-LABEL: abd_subnsw_i64_undef:
2356 ; RV64I-NEXT: sub a0, a0, a1
2357 ; RV64I-NEXT: srai a1, a0, 63
2358 ; RV64I-NEXT: xor a0, a0, a1
2359 ; RV64I-NEXT: sub a0, a1, a0
2362 ; RV32ZBB-LABEL: abd_subnsw_i64_undef:
2364 ; RV32ZBB-NEXT: sltu a4, a0, a2
2365 ; RV32ZBB-NEXT: sub a1, a1, a3
2366 ; RV32ZBB-NEXT: sub a0, a0, a2
2367 ; RV32ZBB-NEXT: sub a1, a1, a4
2368 ; RV32ZBB-NEXT: srai a2, a1, 31
2369 ; RV32ZBB-NEXT: xor a0, a0, a2
2370 ; RV32ZBB-NEXT: xor a1, a1, a2
2371 ; RV32ZBB-NEXT: sltu a3, a2, a0
2372 ; RV32ZBB-NEXT: sub a1, a2, a1
2373 ; RV32ZBB-NEXT: sub a1, a1, a3
2374 ; RV32ZBB-NEXT: sub a0, a2, a0
2377 ; RV64ZBB-LABEL: abd_subnsw_i64_undef:
2379 ; RV64ZBB-NEXT: sub a0, a0, a1
2380 ; RV64ZBB-NEXT: neg a1, a0
2381 ; RV64ZBB-NEXT: min a0, a0, a1
2383 %sub = sub nsw i64 %a, %b
2384 %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
2385 %nabs = sub i64 0, %abs
2389 define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
2390 ; RV32I-LABEL: abd_subnsw_i128:
2392 ; RV32I-NEXT: lw a3, 0(a2)
2393 ; RV32I-NEXT: lw a4, 4(a2)
2394 ; RV32I-NEXT: lw a5, 8(a2)
2395 ; RV32I-NEXT: lw a6, 12(a2)
2396 ; RV32I-NEXT: lw t0, 8(a1)
2397 ; RV32I-NEXT: lw t1, 12(a1)
2398 ; RV32I-NEXT: lw a2, 0(a1)
2399 ; RV32I-NEXT: lw a7, 4(a1)
2400 ; RV32I-NEXT: sltu a1, t0, a5
2401 ; RV32I-NEXT: sub t1, t1, a6
2402 ; RV32I-NEXT: sltu a6, a2, a3
2403 ; RV32I-NEXT: sub a1, t1, a1
2404 ; RV32I-NEXT: mv t1, a6
2405 ; RV32I-NEXT: beq a7, a4, .LBB31_2
2406 ; RV32I-NEXT: # %bb.1:
2407 ; RV32I-NEXT: sltu t1, a7, a4
2408 ; RV32I-NEXT: .LBB31_2:
2409 ; RV32I-NEXT: sub a5, t0, a5
2410 ; RV32I-NEXT: sub a4, a7, a4
2411 ; RV32I-NEXT: sub a3, a2, a3
2412 ; RV32I-NEXT: sltu a2, a5, t1
2413 ; RV32I-NEXT: sub t0, a4, a6
2414 ; RV32I-NEXT: sub a4, a5, t1
2415 ; RV32I-NEXT: sub a5, a1, a2
2416 ; RV32I-NEXT: srai a1, a5, 31
2417 ; RV32I-NEXT: xor a2, a4, a1
2418 ; RV32I-NEXT: xor a5, a5, a1
2419 ; RV32I-NEXT: xor a4, a3, a1
2420 ; RV32I-NEXT: sltu a3, a1, a2
2421 ; RV32I-NEXT: sub a6, a1, a5
2422 ; RV32I-NEXT: sltu a5, a1, a4
2423 ; RV32I-NEXT: sub a3, a6, a3
2424 ; RV32I-NEXT: xor a7, t0, a1
2425 ; RV32I-NEXT: mv a6, a5
2426 ; RV32I-NEXT: beqz t0, .LBB31_4
2427 ; RV32I-NEXT: # %bb.3:
2428 ; RV32I-NEXT: sltu a6, a1, a7
2429 ; RV32I-NEXT: .LBB31_4:
2430 ; RV32I-NEXT: sub a2, a1, a2
2431 ; RV32I-NEXT: sub a7, a1, a7
2432 ; RV32I-NEXT: sub a1, a1, a4
2433 ; RV32I-NEXT: sltu a4, a2, a6
2434 ; RV32I-NEXT: sub a2, a2, a6
2435 ; RV32I-NEXT: sub a5, a7, a5
2436 ; RV32I-NEXT: sub a3, a3, a4
2437 ; RV32I-NEXT: sw a1, 0(a0)
2438 ; RV32I-NEXT: sw a5, 4(a0)
2439 ; RV32I-NEXT: sw a2, 8(a0)
2440 ; RV32I-NEXT: sw a3, 12(a0)
2443 ; RV64I-LABEL: abd_subnsw_i128:
2445 ; RV64I-NEXT: sltu a4, a0, a2
2446 ; RV64I-NEXT: sub a1, a1, a3
2447 ; RV64I-NEXT: sub a0, a0, a2
2448 ; RV64I-NEXT: sub a1, a1, a4
2449 ; RV64I-NEXT: srai a2, a1, 63
2450 ; RV64I-NEXT: xor a0, a0, a2
2451 ; RV64I-NEXT: xor a1, a1, a2
2452 ; RV64I-NEXT: sltu a3, a2, a0
2453 ; RV64I-NEXT: sub a1, a2, a1
2454 ; RV64I-NEXT: sub a1, a1, a3
2455 ; RV64I-NEXT: sub a0, a2, a0
2458 ; RV32ZBB-LABEL: abd_subnsw_i128:
2460 ; RV32ZBB-NEXT: lw a3, 0(a2)
2461 ; RV32ZBB-NEXT: lw a4, 4(a2)
2462 ; RV32ZBB-NEXT: lw a5, 8(a2)
2463 ; RV32ZBB-NEXT: lw a6, 12(a2)
2464 ; RV32ZBB-NEXT: lw t0, 8(a1)
2465 ; RV32ZBB-NEXT: lw t1, 12(a1)
2466 ; RV32ZBB-NEXT: lw a2, 0(a1)
2467 ; RV32ZBB-NEXT: lw a7, 4(a1)
2468 ; RV32ZBB-NEXT: sltu a1, t0, a5
2469 ; RV32ZBB-NEXT: sub t1, t1, a6
2470 ; RV32ZBB-NEXT: sltu a6, a2, a3
2471 ; RV32ZBB-NEXT: sub a1, t1, a1
2472 ; RV32ZBB-NEXT: mv t1, a6
2473 ; RV32ZBB-NEXT: beq a7, a4, .LBB31_2
2474 ; RV32ZBB-NEXT: # %bb.1:
2475 ; RV32ZBB-NEXT: sltu t1, a7, a4
2476 ; RV32ZBB-NEXT: .LBB31_2:
2477 ; RV32ZBB-NEXT: sub a5, t0, a5
2478 ; RV32ZBB-NEXT: sub a4, a7, a4
2479 ; RV32ZBB-NEXT: sub a3, a2, a3
2480 ; RV32ZBB-NEXT: sltu a2, a5, t1
2481 ; RV32ZBB-NEXT: sub t0, a4, a6
2482 ; RV32ZBB-NEXT: sub a4, a5, t1
2483 ; RV32ZBB-NEXT: sub a5, a1, a2
2484 ; RV32ZBB-NEXT: srai a1, a5, 31
2485 ; RV32ZBB-NEXT: xor a2, a4, a1
2486 ; RV32ZBB-NEXT: xor a5, a5, a1
2487 ; RV32ZBB-NEXT: xor a4, a3, a1
2488 ; RV32ZBB-NEXT: sltu a3, a1, a2
2489 ; RV32ZBB-NEXT: sub a6, a1, a5
2490 ; RV32ZBB-NEXT: sltu a5, a1, a4
2491 ; RV32ZBB-NEXT: sub a3, a6, a3
2492 ; RV32ZBB-NEXT: xor a7, t0, a1
2493 ; RV32ZBB-NEXT: mv a6, a5
2494 ; RV32ZBB-NEXT: beqz t0, .LBB31_4
2495 ; RV32ZBB-NEXT: # %bb.3:
2496 ; RV32ZBB-NEXT: sltu a6, a1, a7
2497 ; RV32ZBB-NEXT: .LBB31_4:
2498 ; RV32ZBB-NEXT: sub a2, a1, a2
2499 ; RV32ZBB-NEXT: sub a7, a1, a7
2500 ; RV32ZBB-NEXT: sub a1, a1, a4
2501 ; RV32ZBB-NEXT: sltu a4, a2, a6
2502 ; RV32ZBB-NEXT: sub a2, a2, a6
2503 ; RV32ZBB-NEXT: sub a5, a7, a5
2504 ; RV32ZBB-NEXT: sub a3, a3, a4
2505 ; RV32ZBB-NEXT: sw a1, 0(a0)
2506 ; RV32ZBB-NEXT: sw a5, 4(a0)
2507 ; RV32ZBB-NEXT: sw a2, 8(a0)
2508 ; RV32ZBB-NEXT: sw a3, 12(a0)
2511 ; RV64ZBB-LABEL: abd_subnsw_i128:
2513 ; RV64ZBB-NEXT: sltu a4, a0, a2
2514 ; RV64ZBB-NEXT: sub a1, a1, a3
2515 ; RV64ZBB-NEXT: sub a0, a0, a2
2516 ; RV64ZBB-NEXT: sub a1, a1, a4
2517 ; RV64ZBB-NEXT: srai a2, a1, 63
2518 ; RV64ZBB-NEXT: xor a0, a0, a2
2519 ; RV64ZBB-NEXT: xor a1, a1, a2
2520 ; RV64ZBB-NEXT: sltu a3, a2, a0
2521 ; RV64ZBB-NEXT: sub a1, a2, a1
2522 ; RV64ZBB-NEXT: sub a1, a1, a3
2523 ; RV64ZBB-NEXT: sub a0, a2, a0
2525 %sub = sub nsw i128 %a, %b
2526 %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false)
2527 %nabs = sub i128 0, %abs
2531 define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
2532 ; RV32I-LABEL: abd_subnsw_i128_undef:
2534 ; RV32I-NEXT: lw a3, 0(a2)
2535 ; RV32I-NEXT: lw a4, 4(a2)
2536 ; RV32I-NEXT: lw a5, 8(a2)
2537 ; RV32I-NEXT: lw a6, 12(a2)
2538 ; RV32I-NEXT: lw t0, 8(a1)
2539 ; RV32I-NEXT: lw t1, 12(a1)
2540 ; RV32I-NEXT: lw a2, 0(a1)
2541 ; RV32I-NEXT: lw a7, 4(a1)
2542 ; RV32I-NEXT: sltu a1, t0, a5
2543 ; RV32I-NEXT: sub t1, t1, a6
2544 ; RV32I-NEXT: sltu a6, a2, a3
2545 ; RV32I-NEXT: sub a1, t1, a1
2546 ; RV32I-NEXT: mv t1, a6
2547 ; RV32I-NEXT: beq a7, a4, .LBB32_2
2548 ; RV32I-NEXT: # %bb.1:
2549 ; RV32I-NEXT: sltu t1, a7, a4
2550 ; RV32I-NEXT: .LBB32_2:
2551 ; RV32I-NEXT: sub a5, t0, a5
2552 ; RV32I-NEXT: sub a4, a7, a4
2553 ; RV32I-NEXT: sub a3, a2, a3
2554 ; RV32I-NEXT: sltu a2, a5, t1
2555 ; RV32I-NEXT: sub t0, a4, a6
2556 ; RV32I-NEXT: sub a4, a5, t1
2557 ; RV32I-NEXT: sub a5, a1, a2
2558 ; RV32I-NEXT: srai a1, a5, 31
2559 ; RV32I-NEXT: xor a2, a4, a1
2560 ; RV32I-NEXT: xor a5, a5, a1
2561 ; RV32I-NEXT: xor a4, a3, a1
2562 ; RV32I-NEXT: sltu a3, a1, a2
2563 ; RV32I-NEXT: sub a6, a1, a5
2564 ; RV32I-NEXT: sltu a5, a1, a4
2565 ; RV32I-NEXT: sub a3, a6, a3
2566 ; RV32I-NEXT: xor a7, t0, a1
2567 ; RV32I-NEXT: mv a6, a5
2568 ; RV32I-NEXT: beqz t0, .LBB32_4
2569 ; RV32I-NEXT: # %bb.3:
2570 ; RV32I-NEXT: sltu a6, a1, a7
2571 ; RV32I-NEXT: .LBB32_4:
2572 ; RV32I-NEXT: sub a2, a1, a2
2573 ; RV32I-NEXT: sub a7, a1, a7
2574 ; RV32I-NEXT: sub a1, a1, a4
2575 ; RV32I-NEXT: sltu a4, a2, a6
2576 ; RV32I-NEXT: sub a2, a2, a6
2577 ; RV32I-NEXT: sub a5, a7, a5
2578 ; RV32I-NEXT: sub a3, a3, a4
2579 ; RV32I-NEXT: sw a1, 0(a0)
2580 ; RV32I-NEXT: sw a5, 4(a0)
2581 ; RV32I-NEXT: sw a2, 8(a0)
2582 ; RV32I-NEXT: sw a3, 12(a0)
2585 ; RV64I-LABEL: abd_subnsw_i128_undef:
2587 ; RV64I-NEXT: sltu a4, a0, a2
2588 ; RV64I-NEXT: sub a1, a1, a3
2589 ; RV64I-NEXT: sub a0, a0, a2
2590 ; RV64I-NEXT: sub a1, a1, a4
2591 ; RV64I-NEXT: srai a2, a1, 63
2592 ; RV64I-NEXT: xor a0, a0, a2
2593 ; RV64I-NEXT: xor a1, a1, a2
2594 ; RV64I-NEXT: sltu a3, a2, a0
2595 ; RV64I-NEXT: sub a1, a2, a1
2596 ; RV64I-NEXT: sub a1, a1, a3
2597 ; RV64I-NEXT: sub a0, a2, a0
2600 ; RV32ZBB-LABEL: abd_subnsw_i128_undef:
2602 ; RV32ZBB-NEXT: lw a3, 0(a2)
2603 ; RV32ZBB-NEXT: lw a4, 4(a2)
2604 ; RV32ZBB-NEXT: lw a5, 8(a2)
2605 ; RV32ZBB-NEXT: lw a6, 12(a2)
2606 ; RV32ZBB-NEXT: lw t0, 8(a1)
2607 ; RV32ZBB-NEXT: lw t1, 12(a1)
2608 ; RV32ZBB-NEXT: lw a2, 0(a1)
2609 ; RV32ZBB-NEXT: lw a7, 4(a1)
2610 ; RV32ZBB-NEXT: sltu a1, t0, a5
2611 ; RV32ZBB-NEXT: sub t1, t1, a6
2612 ; RV32ZBB-NEXT: sltu a6, a2, a3
2613 ; RV32ZBB-NEXT: sub a1, t1, a1
2614 ; RV32ZBB-NEXT: mv t1, a6
2615 ; RV32ZBB-NEXT: beq a7, a4, .LBB32_2
2616 ; RV32ZBB-NEXT: # %bb.1:
2617 ; RV32ZBB-NEXT: sltu t1, a7, a4
2618 ; RV32ZBB-NEXT: .LBB32_2:
2619 ; RV32ZBB-NEXT: sub a5, t0, a5
2620 ; RV32ZBB-NEXT: sub a4, a7, a4
2621 ; RV32ZBB-NEXT: sub a3, a2, a3
2622 ; RV32ZBB-NEXT: sltu a2, a5, t1
2623 ; RV32ZBB-NEXT: sub t0, a4, a6
2624 ; RV32ZBB-NEXT: sub a4, a5, t1
2625 ; RV32ZBB-NEXT: sub a5, a1, a2
2626 ; RV32ZBB-NEXT: srai a1, a5, 31
2627 ; RV32ZBB-NEXT: xor a2, a4, a1
2628 ; RV32ZBB-NEXT: xor a5, a5, a1
2629 ; RV32ZBB-NEXT: xor a4, a3, a1
2630 ; RV32ZBB-NEXT: sltu a3, a1, a2
2631 ; RV32ZBB-NEXT: sub a6, a1, a5
2632 ; RV32ZBB-NEXT: sltu a5, a1, a4
2633 ; RV32ZBB-NEXT: sub a3, a6, a3
2634 ; RV32ZBB-NEXT: xor a7, t0, a1
2635 ; RV32ZBB-NEXT: mv a6, a5
2636 ; RV32ZBB-NEXT: beqz t0, .LBB32_4
2637 ; RV32ZBB-NEXT: # %bb.3:
2638 ; RV32ZBB-NEXT: sltu a6, a1, a7
2639 ; RV32ZBB-NEXT: .LBB32_4:
2640 ; RV32ZBB-NEXT: sub a2, a1, a2
2641 ; RV32ZBB-NEXT: sub a7, a1, a7
2642 ; RV32ZBB-NEXT: sub a1, a1, a4
2643 ; RV32ZBB-NEXT: sltu a4, a2, a6
2644 ; RV32ZBB-NEXT: sub a2, a2, a6
2645 ; RV32ZBB-NEXT: sub a5, a7, a5
2646 ; RV32ZBB-NEXT: sub a3, a3, a4
2647 ; RV32ZBB-NEXT: sw a1, 0(a0)
2648 ; RV32ZBB-NEXT: sw a5, 4(a0)
2649 ; RV32ZBB-NEXT: sw a2, 8(a0)
2650 ; RV32ZBB-NEXT: sw a3, 12(a0)
2653 ; RV64ZBB-LABEL: abd_subnsw_i128_undef:
2655 ; RV64ZBB-NEXT: sltu a4, a0, a2
2656 ; RV64ZBB-NEXT: sub a1, a1, a3
2657 ; RV64ZBB-NEXT: sub a0, a0, a2
2658 ; RV64ZBB-NEXT: sub a1, a1, a4
2659 ; RV64ZBB-NEXT: srai a2, a1, 63
2660 ; RV64ZBB-NEXT: xor a0, a0, a2
2661 ; RV64ZBB-NEXT: xor a1, a1, a2
2662 ; RV64ZBB-NEXT: sltu a3, a2, a0
2663 ; RV64ZBB-NEXT: sub a1, a2, a1
2664 ; RV64ZBB-NEXT: sub a1, a1, a3
2665 ; RV64ZBB-NEXT: sub a0, a2, a0
2667 %sub = sub nsw i128 %a, %b
2668 %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true)
2669 %nabs = sub i128 0, %abs
2673 declare i8 @llvm.abs.i8(i8, i1)
2674 declare i16 @llvm.abs.i16(i16, i1)
2675 declare i32 @llvm.abs.i32(i32, i1)
2676 declare i64 @llvm.abs.i64(i64, i1)
2677 declare i128 @llvm.abs.i128(i128, i1)
2679 declare i8 @llvm.smax.i8(i8, i8)
2680 declare i16 @llvm.smax.i16(i16, i16)
2681 declare i32 @llvm.smax.i32(i32, i32)
2682 declare i64 @llvm.smax.i64(i64, i64)
2684 declare i8 @llvm.smin.i8(i8, i8)
2685 declare i16 @llvm.smin.i16(i16, i16)
2686 declare i32 @llvm.smin.i32(i32, i32)
2687 declare i64 @llvm.smin.i64(i64, i64)