1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RV32I
3 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefix=RV64I
4 ; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefix=RV32IZbb
5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefix=RV64IZbb
7 declare i4 @llvm.usub.sat.i4(i4, i4)
8 declare i8 @llvm.usub.sat.i8(i8, i8)
9 declare i16 @llvm.usub.sat.i16(i16, i16)
10 declare i32 @llvm.usub.sat.i32(i32, i32)
11 declare i64 @llvm.usub.sat.i64(i64, i64)
13 define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
14 ; RV32I-LABEL: func32:
16 ; RV32I-NEXT: mv a3, a0
17 ; RV32I-NEXT: mul a0, a1, a2
18 ; RV32I-NEXT: sub a1, a3, a0
19 ; RV32I-NEXT: li a0, 0
20 ; RV32I-NEXT: bltu a3, a1, .LBB0_2
21 ; RV32I-NEXT: # %bb.1:
22 ; RV32I-NEXT: mv a0, a1
23 ; RV32I-NEXT: .LBB0_2:
26 ; RV64I-LABEL: func32:
28 ; RV64I-NEXT: mulw a1, a1, a2
29 ; RV64I-NEXT: subw a1, a0, a1
30 ; RV64I-NEXT: sext.w a2, a0
31 ; RV64I-NEXT: li a0, 0
32 ; RV64I-NEXT: bltu a2, a1, .LBB0_2
33 ; RV64I-NEXT: # %bb.1:
34 ; RV64I-NEXT: mv a0, a1
35 ; RV64I-NEXT: .LBB0_2:
38 ; RV32IZbb-LABEL: func32:
40 ; RV32IZbb-NEXT: mul a1, a1, a2
41 ; RV32IZbb-NEXT: maxu a0, a0, a1
42 ; RV32IZbb-NEXT: sub a0, a0, a1
45 ; RV64IZbb-LABEL: func32:
47 ; RV64IZbb-NEXT: mulw a1, a1, a2
48 ; RV64IZbb-NEXT: sext.w a0, a0
49 ; RV64IZbb-NEXT: maxu a0, a0, a1
50 ; RV64IZbb-NEXT: sub a0, a0, a1
53 %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 %a)
57 define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
58 ; RV32I-LABEL: func64:
60 ; RV32I-NEXT: sltu a2, a0, a4
61 ; RV32I-NEXT: sub a3, a1, a5
62 ; RV32I-NEXT: sub a2, a3, a2
63 ; RV32I-NEXT: sub a3, a0, a4
64 ; RV32I-NEXT: beq a2, a1, .LBB1_2
65 ; RV32I-NEXT: # %bb.1:
66 ; RV32I-NEXT: sltu a4, a1, a2
67 ; RV32I-NEXT: j .LBB1_3
68 ; RV32I-NEXT: .LBB1_2:
69 ; RV32I-NEXT: sltu a4, a0, a3
70 ; RV32I-NEXT: .LBB1_3:
71 ; RV32I-NEXT: li a0, 0
72 ; RV32I-NEXT: li a1, 0
73 ; RV32I-NEXT: bnez a4, .LBB1_5
74 ; RV32I-NEXT: # %bb.4:
75 ; RV32I-NEXT: mv a0, a3
76 ; RV32I-NEXT: mv a1, a2
77 ; RV32I-NEXT: .LBB1_5:
80 ; RV64I-LABEL: func64:
82 ; RV64I-NEXT: mv a1, a0
83 ; RV64I-NEXT: sub a2, a0, a2
84 ; RV64I-NEXT: li a0, 0
85 ; RV64I-NEXT: bltu a1, a2, .LBB1_2
86 ; RV64I-NEXT: # %bb.1:
87 ; RV64I-NEXT: mv a0, a2
88 ; RV64I-NEXT: .LBB1_2:
91 ; RV32IZbb-LABEL: func64:
93 ; RV32IZbb-NEXT: sltu a2, a0, a4
94 ; RV32IZbb-NEXT: sub a3, a1, a5
95 ; RV32IZbb-NEXT: sub a2, a3, a2
96 ; RV32IZbb-NEXT: sub a3, a0, a4
97 ; RV32IZbb-NEXT: beq a2, a1, .LBB1_2
98 ; RV32IZbb-NEXT: # %bb.1:
99 ; RV32IZbb-NEXT: sltu a4, a1, a2
100 ; RV32IZbb-NEXT: j .LBB1_3
101 ; RV32IZbb-NEXT: .LBB1_2:
102 ; RV32IZbb-NEXT: sltu a4, a0, a3
103 ; RV32IZbb-NEXT: .LBB1_3:
104 ; RV32IZbb-NEXT: li a0, 0
105 ; RV32IZbb-NEXT: li a1, 0
106 ; RV32IZbb-NEXT: bnez a4, .LBB1_5
107 ; RV32IZbb-NEXT: # %bb.4:
108 ; RV32IZbb-NEXT: mv a0, a3
109 ; RV32IZbb-NEXT: mv a1, a2
110 ; RV32IZbb-NEXT: .LBB1_5:
113 ; RV64IZbb-LABEL: func64:
115 ; RV64IZbb-NEXT: maxu a0, a0, a2
116 ; RV64IZbb-NEXT: sub a0, a0, a2
119 %tmp = call i64 @llvm.usub.sat.i64(i64 %x, i64 %z)
123 define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind {
124 ; RV32I-LABEL: func16:
126 ; RV32I-NEXT: lui a3, 16
127 ; RV32I-NEXT: addi a3, a3, -1
128 ; RV32I-NEXT: and a4, a0, a3
129 ; RV32I-NEXT: mul a0, a1, a2
130 ; RV32I-NEXT: and a0, a0, a3
131 ; RV32I-NEXT: sub a1, a4, a0
132 ; RV32I-NEXT: li a0, 0
133 ; RV32I-NEXT: bltu a4, a1, .LBB2_2
134 ; RV32I-NEXT: # %bb.1:
135 ; RV32I-NEXT: mv a0, a1
136 ; RV32I-NEXT: .LBB2_2:
139 ; RV64I-LABEL: func16:
141 ; RV64I-NEXT: lui a3, 16
142 ; RV64I-NEXT: addiw a3, a3, -1
143 ; RV64I-NEXT: and a4, a0, a3
144 ; RV64I-NEXT: mul a0, a1, a2
145 ; RV64I-NEXT: and a0, a0, a3
146 ; RV64I-NEXT: sub a1, a4, a0
147 ; RV64I-NEXT: li a0, 0
148 ; RV64I-NEXT: bltu a4, a1, .LBB2_2
149 ; RV64I-NEXT: # %bb.1:
150 ; RV64I-NEXT: mv a0, a1
151 ; RV64I-NEXT: .LBB2_2:
154 ; RV32IZbb-LABEL: func16:
156 ; RV32IZbb-NEXT: zext.h a0, a0
157 ; RV32IZbb-NEXT: mul a1, a1, a2
158 ; RV32IZbb-NEXT: zext.h a1, a1
159 ; RV32IZbb-NEXT: maxu a0, a0, a1
160 ; RV32IZbb-NEXT: sub a0, a0, a1
163 ; RV64IZbb-LABEL: func16:
165 ; RV64IZbb-NEXT: zext.h a0, a0
166 ; RV64IZbb-NEXT: mulw a1, a1, a2
167 ; RV64IZbb-NEXT: zext.h a1, a1
168 ; RV64IZbb-NEXT: maxu a0, a0, a1
169 ; RV64IZbb-NEXT: sub a0, a0, a1
172 %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %a)
176 define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind {
177 ; RV32I-LABEL: func8:
179 ; RV32I-NEXT: andi a3, a0, 255
180 ; RV32I-NEXT: mul a0, a1, a2
181 ; RV32I-NEXT: andi a0, a0, 255
182 ; RV32I-NEXT: sub a1, a3, a0
183 ; RV32I-NEXT: li a0, 0
184 ; RV32I-NEXT: bltu a3, a1, .LBB3_2
185 ; RV32I-NEXT: # %bb.1:
186 ; RV32I-NEXT: mv a0, a1
187 ; RV32I-NEXT: .LBB3_2:
190 ; RV64I-LABEL: func8:
192 ; RV64I-NEXT: andi a3, a0, 255
193 ; RV64I-NEXT: mulw a0, a1, a2
194 ; RV64I-NEXT: andi a0, a0, 255
195 ; RV64I-NEXT: sub a1, a3, a0
196 ; RV64I-NEXT: li a0, 0
197 ; RV64I-NEXT: bltu a3, a1, .LBB3_2
198 ; RV64I-NEXT: # %bb.1:
199 ; RV64I-NEXT: mv a0, a1
200 ; RV64I-NEXT: .LBB3_2:
203 ; RV32IZbb-LABEL: func8:
205 ; RV32IZbb-NEXT: andi a0, a0, 255
206 ; RV32IZbb-NEXT: mul a1, a1, a2
207 ; RV32IZbb-NEXT: andi a1, a1, 255
208 ; RV32IZbb-NEXT: maxu a0, a0, a1
209 ; RV32IZbb-NEXT: sub a0, a0, a1
212 ; RV64IZbb-LABEL: func8:
214 ; RV64IZbb-NEXT: andi a0, a0, 255
215 ; RV64IZbb-NEXT: mulw a1, a1, a2
216 ; RV64IZbb-NEXT: andi a1, a1, 255
217 ; RV64IZbb-NEXT: maxu a0, a0, a1
218 ; RV64IZbb-NEXT: sub a0, a0, a1
221 %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %a)
225 define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind {
226 ; RV32I-LABEL: func4:
228 ; RV32I-NEXT: andi a3, a0, 15
229 ; RV32I-NEXT: mul a0, a1, a2
230 ; RV32I-NEXT: andi a0, a0, 15
231 ; RV32I-NEXT: sub a1, a3, a0
232 ; RV32I-NEXT: li a0, 0
233 ; RV32I-NEXT: bltu a3, a1, .LBB4_2
234 ; RV32I-NEXT: # %bb.1:
235 ; RV32I-NEXT: mv a0, a1
236 ; RV32I-NEXT: .LBB4_2:
239 ; RV64I-LABEL: func4:
241 ; RV64I-NEXT: andi a3, a0, 15
242 ; RV64I-NEXT: mulw a0, a1, a2
243 ; RV64I-NEXT: andi a0, a0, 15
244 ; RV64I-NEXT: sub a1, a3, a0
245 ; RV64I-NEXT: li a0, 0
246 ; RV64I-NEXT: bltu a3, a1, .LBB4_2
247 ; RV64I-NEXT: # %bb.1:
248 ; RV64I-NEXT: mv a0, a1
249 ; RV64I-NEXT: .LBB4_2:
252 ; RV32IZbb-LABEL: func4:
254 ; RV32IZbb-NEXT: andi a0, a0, 15
255 ; RV32IZbb-NEXT: mul a1, a1, a2
256 ; RV32IZbb-NEXT: andi a1, a1, 15
257 ; RV32IZbb-NEXT: maxu a0, a0, a1
258 ; RV32IZbb-NEXT: sub a0, a0, a1
261 ; RV64IZbb-LABEL: func4:
263 ; RV64IZbb-NEXT: andi a0, a0, 15
264 ; RV64IZbb-NEXT: mulw a1, a1, a2
265 ; RV64IZbb-NEXT: andi a1, a1, 15
266 ; RV64IZbb-NEXT: maxu a0, a0, a1
267 ; RV64IZbb-NEXT: sub a0, a0, a1
270 %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %a)