1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX10 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11 %s
8 define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
9 ; GFX6-LABEL: v_ssubsat_i8:
11 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8
13 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8
14 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
15 ; GFX6-NEXT: s_movk_i32 s4, 0xff80
16 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x7f
17 ; GFX6-NEXT: v_med3_i32 v0, v0, s4, v1
18 ; GFX6-NEXT: s_setpc_b64 s[30:31]
20 ; GFX8-LABEL: v_ssubsat_i8:
22 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX8-NEXT: v_sub_u16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
24 ; GFX8-NEXT: v_min_i16_e32 v0, 0x7f, v0
25 ; GFX8-NEXT: v_max_i16_e32 v0, 0xff80, v0
26 ; GFX8-NEXT: s_setpc_b64 s[30:31]
28 ; GFX9-LABEL: v_ssubsat_i8:
30 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
32 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0
33 ; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp
34 ; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0
35 ; GFX9-NEXT: s_setpc_b64 s[30:31]
37 ; GFX10PLUS-LABEL: v_ssubsat_i8:
39 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40 ; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1
41 ; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0
42 ; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
43 ; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 8, v0
44 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
45 %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs)
49 define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
50 ; GFX6-LABEL: v_ssubsat_i16:
52 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
54 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
55 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
56 ; GFX6-NEXT: s_movk_i32 s4, 0x8000
57 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x7fff
58 ; GFX6-NEXT: v_med3_i32 v0, v0, s4, v1
59 ; GFX6-NEXT: s_setpc_b64 s[30:31]
61 ; GFX8-LABEL: v_ssubsat_i16:
63 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1
65 ; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
66 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
67 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1
68 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
69 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
70 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
71 ; GFX8-NEXT: s_setpc_b64 s[30:31]
73 ; GFX9-LABEL: v_ssubsat_i16:
75 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp
77 ; GFX9-NEXT: s_setpc_b64 s[30:31]
79 ; GFX10PLUS-LABEL: v_ssubsat_i16:
81 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp
83 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
84 %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs)
88 define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
89 ; GFX6-LABEL: v_ssubsat_i32:
91 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
93 ; GFX6-NEXT: v_sub_i32_e64 v1, s[4:5], v0, v1
94 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
95 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v1
96 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
97 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
98 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
99 ; GFX6-NEXT: s_setpc_b64 s[30:31]
101 ; GFX8-LABEL: v_ssubsat_i32:
103 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
105 ; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v0, v1
106 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
107 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v1
108 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
109 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
110 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
111 ; GFX8-NEXT: s_setpc_b64 s[30:31]
113 ; GFX9-LABEL: v_ssubsat_i32:
115 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116 ; GFX9-NEXT: v_sub_i32 v0, v0, v1 clamp
117 ; GFX9-NEXT: s_setpc_b64 s[30:31]
119 ; GFX10PLUS-LABEL: v_ssubsat_i32:
120 ; GFX10PLUS: ; %bb.0:
121 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v1 clamp
123 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
124 %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs)
128 define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
129 ; GFX6-LABEL: v_ssubsat_v2i16:
131 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
133 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
134 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
135 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
136 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
137 ; GFX6-NEXT: s_movk_i32 s4, 0x8000
138 ; GFX6-NEXT: v_mov_b32_e32 v3, 0x7fff
139 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
140 ; GFX6-NEXT: v_med3_i32 v1, v1, s4, v3
141 ; GFX6-NEXT: v_med3_i32 v0, v0, s4, v3
142 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v1
143 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
144 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
145 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
146 ; GFX6-NEXT: s_setpc_b64 s[30:31]
148 ; GFX8-LABEL: v_ssubsat_v2i16:
150 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
152 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
153 ; GFX8-NEXT: v_sub_u16_e32 v4, v3, v2
154 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3
155 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2
156 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v4
157 ; GFX8-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2
158 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
159 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
160 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1
161 ; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
162 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
163 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1
164 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
165 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
166 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
167 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
168 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
169 ; GFX8-NEXT: s_setpc_b64 s[30:31]
171 ; GFX9-LABEL: v_ssubsat_v2i16:
173 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174 ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
175 ; GFX9-NEXT: s_setpc_b64 s[30:31]
177 ; GFX10PLUS-LABEL: v_ssubsat_v2i16:
178 ; GFX10PLUS: ; %bb.0:
179 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
181 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
182 %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
183 ret <2 x i16> %result
186 define <3 x i16> @v_ssubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
187 ; GFX6-LABEL: v_ssubsat_v3i16:
189 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
191 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
192 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
193 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
194 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
195 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
196 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
197 ; GFX6-NEXT: s_movk_i32 s4, 0x8000
198 ; GFX6-NEXT: v_mov_b32_e32 v4, 0x7fff
199 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
200 ; GFX6-NEXT: v_med3_i32 v1, v1, s4, v4
201 ; GFX6-NEXT: v_med3_i32 v0, v0, s4, v4
202 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
203 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
204 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
205 ; GFX6-NEXT: v_med3_i32 v3, v2, s4, v4
206 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
207 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
208 ; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16
209 ; GFX6-NEXT: s_setpc_b64 s[30:31]
211 ; GFX8-LABEL: v_ssubsat_v3i16:
213 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
215 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
216 ; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4
217 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
218 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4
219 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6
220 ; GFX8-NEXT: v_xor_b32_e32 v4, 0xffff8000, v4
221 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
222 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
223 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3
224 ; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3
225 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
226 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3
227 ; GFX8-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1
228 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
229 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
230 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2
231 ; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
232 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
233 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2
234 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
235 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
236 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
237 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
238 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
239 ; GFX8-NEXT: s_setpc_b64 s[30:31]
241 ; GFX9-LABEL: v_ssubsat_v3i16:
243 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
245 ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
246 ; GFX9-NEXT: s_setpc_b64 s[30:31]
248 ; GFX10PLUS-LABEL: v_ssubsat_v3i16:
249 ; GFX10PLUS: ; %bb.0:
250 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
252 ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
253 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
254 %result = call <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs)
255 ret <3 x i16> %result
258 define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
259 ; GFX6-LABEL: v_ssubsat_v4i16:
261 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
263 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
264 ; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
265 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
266 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
267 ; GFX6-NEXT: s_movk_i32 s4, 0x8000
268 ; GFX6-NEXT: v_mov_b32_e32 v5, 0x7fff
269 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
270 ; GFX6-NEXT: v_med3_i32 v1, v1, s4, v5
271 ; GFX6-NEXT: v_med3_i32 v0, v0, s4, v5
272 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
273 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
274 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
275 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
276 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
277 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
278 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
279 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v3, v7
280 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v6
281 ; GFX6-NEXT: v_med3_i32 v1, v1, s4, v5
282 ; GFX6-NEXT: v_med3_i32 v2, v2, s4, v5
283 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
284 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
285 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
286 ; GFX6-NEXT: s_setpc_b64 s[30:31]
288 ; GFX8-LABEL: v_ssubsat_v4i16:
290 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
292 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
293 ; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4
294 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
295 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4
296 ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6
297 ; GFX8-NEXT: v_xor_b32_e32 v4, 0xffff8000, v4
298 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
299 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
300 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2
301 ; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
302 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
303 ; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2
304 ; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
305 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
306 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
307 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
308 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
309 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3
310 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
311 ; GFX8-NEXT: v_sub_u16_e32 v5, v4, v2
312 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4
313 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2
314 ; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v5
315 ; GFX8-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2
316 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
317 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
318 ; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3
319 ; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3
320 ; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
321 ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3
322 ; GFX8-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1
323 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
324 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
325 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
326 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
327 ; GFX8-NEXT: s_setpc_b64 s[30:31]
329 ; GFX9-LABEL: v_ssubsat_v4i16:
331 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332 ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
333 ; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
334 ; GFX9-NEXT: s_setpc_b64 s[30:31]
336 ; GFX10PLUS-LABEL: v_ssubsat_v4i16:
337 ; GFX10PLUS: ; %bb.0:
338 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp
340 ; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp
341 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
342 %result = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
343 %cast = bitcast <4 x i16> %result to <2 x float>
344 ret <2 x float> %cast
347 define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
348 ; GFX6-LABEL: v_ssubsat_v2i32:
350 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
352 ; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v2
353 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
354 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v2
355 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
356 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
357 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
358 ; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v3
359 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
360 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
361 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v2
362 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
363 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
364 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
365 ; GFX6-NEXT: s_setpc_b64 s[30:31]
367 ; GFX8-LABEL: v_ssubsat_v2i32:
369 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
371 ; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v0, v2
372 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
373 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v2
374 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
375 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
376 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
377 ; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v1, v3
378 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
379 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
380 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v2
381 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
382 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
383 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
384 ; GFX8-NEXT: s_setpc_b64 s[30:31]
386 ; GFX9-LABEL: v_ssubsat_v2i32:
388 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389 ; GFX9-NEXT: v_sub_i32 v0, v0, v2 clamp
390 ; GFX9-NEXT: v_sub_i32 v1, v1, v3 clamp
391 ; GFX9-NEXT: s_setpc_b64 s[30:31]
393 ; GFX10PLUS-LABEL: v_ssubsat_v2i32:
394 ; GFX10PLUS: ; %bb.0:
395 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v2 clamp
397 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v3 clamp
398 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
399 %result = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
400 ret <2 x i32> %result
403 define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
404 ; GFX6-LABEL: v_ssubsat_v3i32:
406 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
407 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
408 ; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v3
409 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
410 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3
411 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
412 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
413 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
414 ; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v4
415 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
416 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1
417 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v3
418 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
419 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
420 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
421 ; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v5
422 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
423 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2
424 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v3
425 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
426 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
427 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
428 ; GFX6-NEXT: s_setpc_b64 s[30:31]
430 ; GFX8-LABEL: v_ssubsat_v3i32:
432 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
434 ; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v0, v3
435 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
436 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3
437 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
438 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
439 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
440 ; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v1, v4
441 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
442 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1
443 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v3
444 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
445 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
446 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
447 ; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v2, v5
448 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
449 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2
450 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v3
451 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
452 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
453 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
454 ; GFX8-NEXT: s_setpc_b64 s[30:31]
456 ; GFX9-LABEL: v_ssubsat_v3i32:
458 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459 ; GFX9-NEXT: v_sub_i32 v0, v0, v3 clamp
460 ; GFX9-NEXT: v_sub_i32 v1, v1, v4 clamp
461 ; GFX9-NEXT: v_sub_i32 v2, v2, v5 clamp
462 ; GFX9-NEXT: s_setpc_b64 s[30:31]
464 ; GFX10PLUS-LABEL: v_ssubsat_v3i32:
465 ; GFX10PLUS: ; %bb.0:
466 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
467 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v3 clamp
468 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v4 clamp
469 ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v5 clamp
470 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
471 %result = call <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs)
472 ret <3 x i32> %result
475 define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
476 ; GFX6-LABEL: v_ssubsat_v4i32:
478 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
479 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
480 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v0, v4
481 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0
482 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v4
483 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
484 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
485 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
486 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v5
487 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
488 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1
489 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v4
490 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
491 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
492 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
493 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6
494 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6
495 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2
496 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v4
497 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
498 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
499 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
500 ; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v7
501 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
502 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
503 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v4
504 ; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
505 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
506 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
507 ; GFX6-NEXT: s_setpc_b64 s[30:31]
509 ; GFX8-LABEL: v_ssubsat_v4i32:
511 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
513 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v0, v4
514 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0
515 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v4
516 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
517 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
518 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
519 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v1, v5
520 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
521 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1
522 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v4
523 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
524 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
525 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
526 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v2, v6
527 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6
528 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2
529 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v4
530 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
531 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
532 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
533 ; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v7
534 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
535 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
536 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v4
537 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
538 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
539 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
540 ; GFX8-NEXT: s_setpc_b64 s[30:31]
542 ; GFX9-LABEL: v_ssubsat_v4i32:
544 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545 ; GFX9-NEXT: v_sub_i32 v0, v0, v4 clamp
546 ; GFX9-NEXT: v_sub_i32 v1, v1, v5 clamp
547 ; GFX9-NEXT: v_sub_i32 v2, v2, v6 clamp
548 ; GFX9-NEXT: v_sub_i32 v3, v3, v7 clamp
549 ; GFX9-NEXT: s_setpc_b64 s[30:31]
551 ; GFX10PLUS-LABEL: v_ssubsat_v4i32:
552 ; GFX10PLUS: ; %bb.0:
553 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v4 clamp
555 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v5 clamp
556 ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v6 clamp
557 ; GFX10PLUS-NEXT: v_sub_nc_i32 v3, v3, v7 clamp
558 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
559 %result = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
560 ret <4 x i32> %result
563 define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
564 ; GFX6-LABEL: v_ssubsat_v8i32:
566 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
567 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8
568 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v8
569 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0
570 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v8
571 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
572 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
573 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
574 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v1, v9
575 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9
576 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1
577 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v8
578 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
579 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
580 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
581 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v10
582 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10
583 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2
584 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v8
585 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
586 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
587 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
588 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v3, v11
589 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
590 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
591 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v8
592 ; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
593 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
594 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
595 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v4, v12
596 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
597 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
598 ; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v8
599 ; GFX6-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
600 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
601 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
602 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v5, v13
603 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
604 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
605 ; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v8
606 ; GFX6-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
607 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
608 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
609 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v6, v14
610 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
611 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
612 ; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v8
613 ; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
614 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
615 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
616 ; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v7, v15
617 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
618 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
619 ; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v8
620 ; GFX6-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
621 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
622 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
623 ; GFX6-NEXT: s_setpc_b64 s[30:31]
625 ; GFX8-LABEL: v_ssubsat_v8i32:
627 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8
629 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v0, v8
630 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0
631 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v8
632 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
633 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
634 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
635 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v1, v9
636 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9
637 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1
638 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v8
639 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
640 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
641 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
642 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v2, v10
643 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10
644 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2
645 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v8
646 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
647 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
648 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
649 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v3, v11
650 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
651 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
652 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v8
653 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
654 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
655 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
656 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v4, v12
657 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
658 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
659 ; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v8
660 ; GFX8-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
661 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
662 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
663 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v5, v13
664 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
665 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
666 ; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v8
667 ; GFX8-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
668 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
669 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
670 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v6, v14
671 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
672 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
673 ; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v8
674 ; GFX8-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
675 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
676 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
677 ; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v7, v15
678 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
679 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
680 ; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v8
681 ; GFX8-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
682 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
683 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
684 ; GFX8-NEXT: s_setpc_b64 s[30:31]
686 ; GFX9-LABEL: v_ssubsat_v8i32:
688 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
689 ; GFX9-NEXT: v_sub_i32 v0, v0, v8 clamp
690 ; GFX9-NEXT: v_sub_i32 v1, v1, v9 clamp
691 ; GFX9-NEXT: v_sub_i32 v2, v2, v10 clamp
692 ; GFX9-NEXT: v_sub_i32 v3, v3, v11 clamp
693 ; GFX9-NEXT: v_sub_i32 v4, v4, v12 clamp
694 ; GFX9-NEXT: v_sub_i32 v5, v5, v13 clamp
695 ; GFX9-NEXT: v_sub_i32 v6, v6, v14 clamp
696 ; GFX9-NEXT: v_sub_i32 v7, v7, v15 clamp
697 ; GFX9-NEXT: s_setpc_b64 s[30:31]
699 ; GFX10PLUS-LABEL: v_ssubsat_v8i32:
700 ; GFX10PLUS: ; %bb.0:
701 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
702 ; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v8 clamp
703 ; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v9 clamp
704 ; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v10 clamp
705 ; GFX10PLUS-NEXT: v_sub_nc_i32 v3, v3, v11 clamp
706 ; GFX10PLUS-NEXT: v_sub_nc_i32 v4, v4, v12 clamp
707 ; GFX10PLUS-NEXT: v_sub_nc_i32 v5, v5, v13 clamp
708 ; GFX10PLUS-NEXT: v_sub_nc_i32 v6, v6, v14 clamp
709 ; GFX10PLUS-NEXT: v_sub_nc_i32 v7, v7, v15 clamp
710 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
711 %result = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs)
712 ret <8 x i32> %result
715 define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
716 ; GFX6-LABEL: v_ssubsat_v16i32:
718 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
719 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
720 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v0, v16
721 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0
722 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v16
723 ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
724 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
725 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
726 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v1, v17
727 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17
728 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1
729 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v16
730 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
731 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
732 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
733 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v2, v18
734 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18
735 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2
736 ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v16
737 ; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
738 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
739 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
740 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v3, v19
741 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
742 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
743 ; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v16
744 ; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
745 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
746 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
747 ; GFX6-NEXT: buffer_load_dword v16, off, s[0:3], s32
748 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v4, v20
749 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
750 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v4
751 ; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v17
752 ; GFX6-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
753 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
754 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v17, v4, vcc
755 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v5, v21
756 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
757 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v5
758 ; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v17
759 ; GFX6-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
760 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
761 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
762 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v6, v22
763 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
764 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v6
765 ; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v17
766 ; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
767 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
768 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v17, v6, vcc
769 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v7, v23
770 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
771 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v7
772 ; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v17
773 ; GFX6-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
774 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
775 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc
776 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v8, v24
777 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
778 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v8
779 ; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v17
780 ; GFX6-NEXT: v_xor_b32_e32 v8, 0x80000000, v8
781 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
782 ; GFX6-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
783 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v9, v25
784 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
785 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v9
786 ; GFX6-NEXT: v_ashrrev_i32_e32 v9, 31, v17
787 ; GFX6-NEXT: v_xor_b32_e32 v9, 0x80000000, v9
788 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
789 ; GFX6-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc
790 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v10, v26
791 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
792 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v10
793 ; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v17
794 ; GFX6-NEXT: v_xor_b32_e32 v10, 0x80000000, v10
795 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
796 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc
797 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v11, v27
798 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
799 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v11
800 ; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v17
801 ; GFX6-NEXT: v_xor_b32_e32 v11, 0x80000000, v11
802 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
803 ; GFX6-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc
804 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v12, v28
805 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
806 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v12
807 ; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v17
808 ; GFX6-NEXT: v_xor_b32_e32 v12, 0x80000000, v12
809 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
810 ; GFX6-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc
811 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v13, v29
812 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
813 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v13
814 ; GFX6-NEXT: v_ashrrev_i32_e32 v13, 31, v17
815 ; GFX6-NEXT: v_xor_b32_e32 v13, 0x80000000, v13
816 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
817 ; GFX6-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc
818 ; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v14, v30
819 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
820 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v14
821 ; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v17
822 ; GFX6-NEXT: v_xor_b32_e32 v14, 0x80000000, v14
823 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
824 ; GFX6-NEXT: v_cndmask_b32_e32 v14, v17, v14, vcc
825 ; GFX6-NEXT: s_waitcnt vmcnt(0)
826 ; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
827 ; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v15, v16
828 ; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
829 ; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v16
830 ; GFX6-NEXT: v_xor_b32_e32 v15, 0x80000000, v15
831 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
832 ; GFX6-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
833 ; GFX6-NEXT: s_setpc_b64 s[30:31]
835 ; GFX8-LABEL: v_ssubsat_v16i32:
837 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
838 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
839 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v0, v16
840 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0
841 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v16
842 ; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
843 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
844 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
845 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v1, v17
846 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17
847 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1
848 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v16
849 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
850 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
851 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
852 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v2, v18
853 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18
854 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2
855 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v16
856 ; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
857 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
858 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
859 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v3, v19
860 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
861 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
862 ; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v16
863 ; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
864 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
865 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
866 ; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32
867 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v4, v20
868 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
869 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v4
870 ; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v17
871 ; GFX8-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
872 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
873 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v17, v4, vcc
874 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v5, v21
875 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
876 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v5
877 ; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v17
878 ; GFX8-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
879 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
880 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc
881 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v6, v22
882 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
883 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v6
884 ; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v17
885 ; GFX8-NEXT: v_xor_b32_e32 v6, 0x80000000, v6
886 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
887 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v6, vcc
888 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v7, v23
889 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
890 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v7
891 ; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v17
892 ; GFX8-NEXT: v_xor_b32_e32 v7, 0x80000000, v7
893 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
894 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc
895 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v8, v24
896 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
897 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v8
898 ; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v17
899 ; GFX8-NEXT: v_xor_b32_e32 v8, 0x80000000, v8
900 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
901 ; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc
902 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v9, v25
903 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
904 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v9
905 ; GFX8-NEXT: v_ashrrev_i32_e32 v9, 31, v17
906 ; GFX8-NEXT: v_xor_b32_e32 v9, 0x80000000, v9
907 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
908 ; GFX8-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc
909 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v10, v26
910 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
911 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v10
912 ; GFX8-NEXT: v_ashrrev_i32_e32 v10, 31, v17
913 ; GFX8-NEXT: v_xor_b32_e32 v10, 0x80000000, v10
914 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
915 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc
916 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v11, v27
917 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
918 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v11
919 ; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v17
920 ; GFX8-NEXT: v_xor_b32_e32 v11, 0x80000000, v11
921 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
922 ; GFX8-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc
923 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v12, v28
924 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
925 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v12
926 ; GFX8-NEXT: v_ashrrev_i32_e32 v12, 31, v17
927 ; GFX8-NEXT: v_xor_b32_e32 v12, 0x80000000, v12
928 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
929 ; GFX8-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc
930 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v13, v29
931 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
932 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v13
933 ; GFX8-NEXT: v_ashrrev_i32_e32 v13, 31, v17
934 ; GFX8-NEXT: v_xor_b32_e32 v13, 0x80000000, v13
935 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
936 ; GFX8-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc
937 ; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v14, v30
938 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
939 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v14
940 ; GFX8-NEXT: v_ashrrev_i32_e32 v14, 31, v17
941 ; GFX8-NEXT: v_xor_b32_e32 v14, 0x80000000, v14
942 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
943 ; GFX8-NEXT: v_cndmask_b32_e32 v14, v17, v14, vcc
944 ; GFX8-NEXT: s_waitcnt vmcnt(0)
945 ; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
946 ; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v15, v16
947 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
948 ; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v16
949 ; GFX8-NEXT: v_xor_b32_e32 v15, 0x80000000, v15
950 ; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
951 ; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
952 ; GFX8-NEXT: s_setpc_b64 s[30:31]
954 ; GFX9-LABEL: v_ssubsat_v16i32:
956 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
957 ; GFX9-NEXT: v_sub_i32 v0, v0, v16 clamp
958 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32
959 ; GFX9-NEXT: v_sub_i32 v1, v1, v17 clamp
960 ; GFX9-NEXT: v_sub_i32 v2, v2, v18 clamp
961 ; GFX9-NEXT: v_sub_i32 v3, v3, v19 clamp
962 ; GFX9-NEXT: v_sub_i32 v4, v4, v20 clamp
963 ; GFX9-NEXT: v_sub_i32 v5, v5, v21 clamp
964 ; GFX9-NEXT: v_sub_i32 v6, v6, v22 clamp
965 ; GFX9-NEXT: v_sub_i32 v7, v7, v23 clamp
966 ; GFX9-NEXT: v_sub_i32 v8, v8, v24 clamp
967 ; GFX9-NEXT: v_sub_i32 v9, v9, v25 clamp
968 ; GFX9-NEXT: v_sub_i32 v10, v10, v26 clamp
969 ; GFX9-NEXT: v_sub_i32 v11, v11, v27 clamp
970 ; GFX9-NEXT: v_sub_i32 v12, v12, v28 clamp
971 ; GFX9-NEXT: v_sub_i32 v13, v13, v29 clamp
972 ; GFX9-NEXT: v_sub_i32 v14, v14, v30 clamp
973 ; GFX9-NEXT: s_waitcnt vmcnt(0)
974 ; GFX9-NEXT: v_sub_i32 v15, v15, v16 clamp
975 ; GFX9-NEXT: s_setpc_b64 s[30:31]
977 ; GFX10-LABEL: v_ssubsat_v16i32:
979 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
980 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
981 ; GFX10-NEXT: v_sub_nc_i32 v0, v0, v16 clamp
982 ; GFX10-NEXT: v_sub_nc_i32 v1, v1, v17 clamp
983 ; GFX10-NEXT: v_sub_nc_i32 v2, v2, v18 clamp
984 ; GFX10-NEXT: v_sub_nc_i32 v3, v3, v19 clamp
985 ; GFX10-NEXT: v_sub_nc_i32 v4, v4, v20 clamp
986 ; GFX10-NEXT: v_sub_nc_i32 v5, v5, v21 clamp
987 ; GFX10-NEXT: v_sub_nc_i32 v6, v6, v22 clamp
988 ; GFX10-NEXT: v_sub_nc_i32 v7, v7, v23 clamp
989 ; GFX10-NEXT: v_sub_nc_i32 v8, v8, v24 clamp
990 ; GFX10-NEXT: v_sub_nc_i32 v9, v9, v25 clamp
991 ; GFX10-NEXT: v_sub_nc_i32 v10, v10, v26 clamp
992 ; GFX10-NEXT: v_sub_nc_i32 v11, v11, v27 clamp
993 ; GFX10-NEXT: v_sub_nc_i32 v12, v12, v28 clamp
994 ; GFX10-NEXT: v_sub_nc_i32 v13, v13, v29 clamp
995 ; GFX10-NEXT: v_sub_nc_i32 v14, v14, v30 clamp
996 ; GFX10-NEXT: s_waitcnt vmcnt(0)
997 ; GFX10-NEXT: v_sub_nc_i32 v15, v15, v31 clamp
998 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1000 ; GFX11-LABEL: v_ssubsat_v16i32:
1002 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1003 ; GFX11-NEXT: scratch_load_b32 v31, off, s32
1004 ; GFX11-NEXT: v_sub_nc_i32 v0, v0, v16 clamp
1005 ; GFX11-NEXT: v_sub_nc_i32 v1, v1, v17 clamp
1006 ; GFX11-NEXT: v_sub_nc_i32 v2, v2, v18 clamp
1007 ; GFX11-NEXT: v_sub_nc_i32 v3, v3, v19 clamp
1008 ; GFX11-NEXT: v_sub_nc_i32 v4, v4, v20 clamp
1009 ; GFX11-NEXT: v_sub_nc_i32 v5, v5, v21 clamp
1010 ; GFX11-NEXT: v_sub_nc_i32 v6, v6, v22 clamp
1011 ; GFX11-NEXT: v_sub_nc_i32 v7, v7, v23 clamp
1012 ; GFX11-NEXT: v_sub_nc_i32 v8, v8, v24 clamp
1013 ; GFX11-NEXT: v_sub_nc_i32 v9, v9, v25 clamp
1014 ; GFX11-NEXT: v_sub_nc_i32 v10, v10, v26 clamp
1015 ; GFX11-NEXT: v_sub_nc_i32 v11, v11, v27 clamp
1016 ; GFX11-NEXT: v_sub_nc_i32 v12, v12, v28 clamp
1017 ; GFX11-NEXT: v_sub_nc_i32 v13, v13, v29 clamp
1018 ; GFX11-NEXT: v_sub_nc_i32 v14, v14, v30 clamp
1019 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1020 ; GFX11-NEXT: v_sub_nc_i32 v15, v15, v31 clamp
1021 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1022 %result = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs)
1023 ret <16 x i32> %result
1027 define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
1028 ; GFX6-LABEL: v_ssubsat_i64:
1030 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1031 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
1032 ; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
1033 ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1034 ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1035 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v5
1036 ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc
1037 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
1038 ; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1039 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
1040 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1042 ; GFX8-LABEL: v_ssubsat_i64:
1044 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1045 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v0, v2
1046 ; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
1047 ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1048 ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1049 ; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v5
1050 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
1051 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
1052 ; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1053 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
1054 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1056 ; GFX9-LABEL: v_ssubsat_i64:
1058 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1059 ; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2
1060 ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
1061 ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1062 ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1063 ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v5
1064 ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc
1065 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
1066 ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
1067 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
1068 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1070 ; GFX10-LABEL: v_ssubsat_i64:
1072 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1073 ; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
1074 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
1075 ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3]
1076 ; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
1077 ; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
1078 ; GFX10-NEXT: v_xor_b32_e32 v1, 0x80000000, v6
1079 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo
1080 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc_lo
1081 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
1082 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1084 ; GFX11-LABEL: v_ssubsat_i64:
1086 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1087 ; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
1088 ; GFX11-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
1089 ; GFX11-NEXT: v_cmp_lt_i64_e64 s0, 0, v[2:3]
1090 ; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v5
1091 ; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
1092 ; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v6
1093 ; GFX11-NEXT: s_xor_b32 vcc_lo, s0, vcc_lo
1094 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v6 :: v_dual_cndmask_b32 v1, v5, v1
1095 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1096 %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs)
1100 declare i8 @llvm.ssub.sat.i8(i8, i8) #0
1101 declare i16 @llvm.ssub.sat.i16(i16, i16) #0
1102 declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0
1103 declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0
1104 declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0
1105 declare i32 @llvm.ssub.sat.i32(i32, i32) #0
1106 declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0
1107 declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0
1108 declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0
1109 declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) #0
1110 declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0
1111 declare i64 @llvm.ssub.sat.i64(i64, i64) #0
1113 attributes #0 = { nounwind readnone speculatable willreturn }